List of usage examples for java.text Normalizer normalize
public static String normalize(CharSequence src, Form form)
From source file:cn.org.once.cstack.model.Snapshot.java
public void setApplicationName(String applicationName) { applicationName = applicationName.toLowerCase(); applicationName = Normalizer.normalize(applicationName, Normalizer.Form.NFD); applicationName = applicationName.replaceAll("[\\p{InCombiningDiacriticalMarks}]", ""); this.applicationName = applicationName.replaceAll("[^a-z0-9]", ""); }
From source file:tr.edu.gsu.nerwip.retrieval.reader.wikipedia.WikipediaReader.java
@Override public String getName(URL url) { String address = url.toString(); // get the last part of the URL as the page name String temp[] = address.split("/"); String result = temp[temp.length - 1]; // remove diacritics result = Normalizer.normalize(result, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); return result; }
From source file:fr.insalyon.creatis.vip.datamanager.server.rpc.FileUploadServiceImpl.java
@Override protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { User user = (User) request.getSession().getAttribute(CoreConstants.SESSION_USER); if (user != null && ServletFileUpload.isMultipartContent(request)) { FileItemFactory factory = new DiskFileItemFactory(); ServletFileUpload upload = new ServletFileUpload(factory); try {//from ww w. j a va 2 s .c om List items = upload.parseRequest(request); Iterator iter = items.iterator(); String fileName = null; FileItem fileItem = null; String path = null; String target = "uploadComplete"; String operationID = "no-id"; while (iter.hasNext()) { FileItem item = (FileItem) iter.next(); if (item.getFieldName().equals("path")) { path = item.getString(); } else if (item.getFieldName().equals("file")) { fileName = item.getName(); fileItem = item; } else if (item.getFieldName().equals("target")) { target = item.getString(); } } if (fileName != null && !fileName.equals("")) { boolean local = path.equals("local") ? true : false; String rootDirectory = DataManagerUtil.getUploadRootDirectory(local); fileName = new File(fileName).getName().trim().replaceAll(" ", "_"); fileName = Normalizer.normalize(fileName, Normalizer.Form.NFD) .replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); File uploadedFile = new File(rootDirectory + fileName); try { fileItem.write(uploadedFile); response.getWriter().write(fileName); if (!local) { // GRIDA Pool Client logger.info("(" + user.getEmail() + ") Uploading '" + uploadedFile.getAbsolutePath() + "' to '" + path + "'."); GRIDAPoolClient client = CoreUtil.getGRIDAPoolClient(); operationID = client.uploadFile(uploadedFile.getAbsolutePath(), DataManagerUtil.parseBaseDir(user, path), user.getEmail()); } else { operationID = fileName; logger.info( "(" + user.getEmail() + ") Uploaded '" + uploadedFile.getAbsolutePath() + "'."); } } catch (Exception ex) { logger.error(ex); } } response.setContentType("text/html"); response.setHeader("Pragma", "No-cache"); response.setDateHeader("Expires", 0); response.setHeader("Cache-Control", "no-cache"); PrintWriter out = response.getWriter(); out.println("<html>"); out.println("<body>"); out.println("<script type=\"text/javascript\">"); out.println("if (parent." + target + ") parent." + target + "('" + operationID + "');"); out.println("</script>"); out.println("</body>"); out.println("</html>"); out.flush(); } catch (FileUploadException ex) { logger.error(ex); } } }
From source file:org.haedus.datatypes.phonetic.FeatureModel.java
public String getBestSymbol(List<Double> featureArray) { List<Double> bestFeatures = new ArrayList<Double>(); String bestSymbol = ""; double minimum = Double.MAX_VALUE; for (Map.Entry<String, List<Double>> entry : featureMap.entrySet()) { List<Double> features = entry.getValue(); double difference = getDifferenceValue(featureArray, features); if (difference < minimum) { bestSymbol = entry.getKey(); minimum = difference;//w w w .ja v a2 s . c o m bestFeatures = features; } } String bestDiacritic = ""; if (minimum > 0.0) { bestDiacritic = getBestDiacritic(featureArray, bestFeatures); } return Normalizer.normalize(bestSymbol + bestDiacritic, Normalizer.Form.NFC); }
From source file:pl.edu.icm.cermine.metadata.model.DocumentAffiliation.java
public String getOrganization() { for (Token<AffiliationLabel> token : tokens) { if (token.getLabel().equals(AffiliationLabel.INST)) { return Normalizer.normalize(token.getText(), Normalizer.Form.NFC); }//from w w w .ja va2 s.co m } return null; }
From source file:org.sonar.batch.scm.DefaultBlameOutput.java
private static String removeAccents(String inputString) { String unicodeDecomposedString = Normalizer.normalize(inputString, Normalizer.Form.NFD); return ACCENT_CODES.matcher(unicodeDecomposedString).replaceAll(""); }
From source file:com.continusec.client.ObjectHash.java
private static final byte[] hashString(String s, String r) throws ContinusecException { if (r != null && s.startsWith(r)) { try {/*from w w w. j a va2s . co m*/ return Hex.decodeHex(s.substring(r.length()).toCharArray()); } catch (DecoderException e) { throw new InvalidObjectException(e); } } else { try { MessageDigest d = DigestUtils.getSha256Digest(); d.update((byte) 'u'); d.update(Normalizer.normalize(s, Normalizer.Form.NFC).getBytes("UTF8")); return d.digest(); } catch (UnsupportedEncodingException e) { throw new InvalidObjectException(e); } } }
From source file:biblivre3.z3950.Z3950Client.java
public List<Record> doSearch(final Z3950ServerDTO server, final Z3950SearchDTO search) { List<Record> listRecords = new ArrayList<Record>(); factory.setHost(server.getUrl());//from w ww . j av a2s .co m factory.setPort(server.getPort()); factory.setCharsetEncoding("UTF-8"); factory.setApplicationContext(z3950Context); factory.setDefaultRecordSyntax("usmarc"); factory.setDefaultElementSetName("F"); factory.setDoCharsetNeg(true); factory.getRecordArchetypes().put("Default", "usmarc::F"); factory.getRecordArchetypes().put("FullDisplay", "usmarc::F"); factory.getRecordArchetypes().put("BriefDisplay", "usmarc::B"); final String qry = QUERY_PREFIX + search.getType() + " \"" + TextUtils.removeDiacriticals(search.getValue()) + "\""; IRQuery query = new IRQuery(); query.collections = new Vector(); query.collections.add(server.getCollection()); query.query = new org.jzkit.search.util.QueryModel.PrefixString.PrefixString(qry); try { Searchable s = factory.newSearchable(); s.setApplicationContext(z3950Context); IRResultSet result = s.evaluate(query); // Wait without timeout until result set is complete or failure result.waitForStatus(IRResultSetStatus.COMPLETE | IRResultSetStatus.FAILURE, 0); if (result.getStatus() == IRResultSetStatus.FAILURE) { log.error("IRResultSetStatus == FAILURE"); } if (result.getFragmentCount() == 0) { return listRecords; } String encoding = server.getCharset(); AnselToUnicode atu = new AnselToUnicode(); Enumeration e = new ReadAheadEnumeration(result, new ArchetypeRecordFormatSpecification("Default")); int errorRecords = 0; Record record = null; for (int i = 0; e.hasMoreElements(); i++) { iso2709 o = (iso2709) e.nextElement(); try { String iso = ""; if (encoding.equals("MARC-8")) { iso = Normalizer.normalize( atu.convert(new String((byte[]) o.getOriginalObject(), "ISO-8859-1")), Normalizer.Form.NFC); } else { iso = new String((byte[]) o.getOriginalObject(), encoding); } try { record = MarcUtils.iso2709ToRecordAsIso(iso, false); } catch (Exception encodeE) { } if (record == null) { try { record = MarcUtils.iso2709ToRecord(iso, false); } catch (Exception encodeE) { } } if (record == null) { try { record = MarcUtils.iso2709ToRecordAsIso( new String((byte[]) o.getOriginalObject(), "ISO-8859-1"), false); } catch (Exception encodeE) { } } if (record == null) { try { record = MarcUtils.iso2709ToRecord( new String((byte[]) o.getOriginalObject(), "ISO-8859-1"), false); } catch (Exception encodeE) { } } } catch (Exception ex) { } if (record != null) { listRecords.add(record); } else { ++errorRecords; } } if (errorRecords > 0) { log.warn("Total number of records that failed the conversion: " + errorRecords); } try { result.close(); s.close(); } catch (Exception closingException) { log.error(closingException.getMessage()); } } catch (Exception e) { log.error(e.getMessage(), e); } log.info("returning results"); return listRecords; }
From source file:com.silverpeas.tags.navigation.links.CachedLinkGeneratorImpl.java
/** * Suppresion des accents d'une chaine de caractres. * @param s/*from ww w. j ava 2 s. c om*/ * @return */ public String stripAccents(String s) { s = Normalizer.normalize(s, Normalizer.Form.NFD); s = s.replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); return s; }
From source file:fr.paris.lutece.plugins.profanityfilter.service.ProfanityFilter.java
public static String removeAccent(String source) { return Normalizer.normalize(source, Normalizer.Form.NFD).replaceAll("[\u0300-\u036F]", ""); }