Example usage for java.text Normalizer normalize

List of usage examples for java.text Normalizer normalize

Introduction

In this page you can find the example usage for java.text Normalizer normalize.

Prototype

public static String normalize(CharSequence src, Form form) 

Source Link

Document

Normalize a sequence of char values.

Usage

From source file:cn.org.once.cstack.model.Snapshot.java

public void setApplicationName(String applicationName) {
    applicationName = applicationName.toLowerCase();
    applicationName = Normalizer.normalize(applicationName, Normalizer.Form.NFD);
    applicationName = applicationName.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
    this.applicationName = applicationName.replaceAll("[^a-z0-9]", "");
}

From source file:tr.edu.gsu.nerwip.retrieval.reader.wikipedia.WikipediaReader.java

@Override
public String getName(URL url) {
    String address = url.toString();

    // get the last part of the URL as the page name
    String temp[] = address.split("/");
    String result = temp[temp.length - 1];

    // remove diacritics
    result = Normalizer.normalize(result, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");

    return result;
}

From source file:fr.insalyon.creatis.vip.datamanager.server.rpc.FileUploadServiceImpl.java

@Override
protected void doPost(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {

    User user = (User) request.getSession().getAttribute(CoreConstants.SESSION_USER);
    if (user != null && ServletFileUpload.isMultipartContent(request)) {

        FileItemFactory factory = new DiskFileItemFactory();
        ServletFileUpload upload = new ServletFileUpload(factory);
        try {//from  ww w. j a va 2  s  .c  om
            List items = upload.parseRequest(request);
            Iterator iter = items.iterator();
            String fileName = null;
            FileItem fileItem = null;
            String path = null;
            String target = "uploadComplete";
            String operationID = "no-id";

            while (iter.hasNext()) {
                FileItem item = (FileItem) iter.next();

                if (item.getFieldName().equals("path")) {
                    path = item.getString();
                } else if (item.getFieldName().equals("file")) {
                    fileName = item.getName();
                    fileItem = item;
                } else if (item.getFieldName().equals("target")) {
                    target = item.getString();
                }
            }
            if (fileName != null && !fileName.equals("")) {

                boolean local = path.equals("local") ? true : false;
                String rootDirectory = DataManagerUtil.getUploadRootDirectory(local);
                fileName = new File(fileName).getName().trim().replaceAll(" ", "_");
                fileName = Normalizer.normalize(fileName, Normalizer.Form.NFD)
                        .replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
                File uploadedFile = new File(rootDirectory + fileName);

                try {
                    fileItem.write(uploadedFile);
                    response.getWriter().write(fileName);

                    if (!local) {
                        // GRIDA Pool Client
                        logger.info("(" + user.getEmail() + ") Uploading '" + uploadedFile.getAbsolutePath()
                                + "' to '" + path + "'.");
                        GRIDAPoolClient client = CoreUtil.getGRIDAPoolClient();
                        operationID = client.uploadFile(uploadedFile.getAbsolutePath(),
                                DataManagerUtil.parseBaseDir(user, path), user.getEmail());

                    } else {
                        operationID = fileName;
                        logger.info(
                                "(" + user.getEmail() + ") Uploaded '" + uploadedFile.getAbsolutePath() + "'.");
                    }
                } catch (Exception ex) {
                    logger.error(ex);
                }
            }

            response.setContentType("text/html");
            response.setHeader("Pragma", "No-cache");
            response.setDateHeader("Expires", 0);
            response.setHeader("Cache-Control", "no-cache");
            PrintWriter out = response.getWriter();
            out.println("<html>");
            out.println("<body>");
            out.println("<script type=\"text/javascript\">");
            out.println("if (parent." + target + ") parent." + target + "('" + operationID + "');");
            out.println("</script>");
            out.println("</body>");
            out.println("</html>");
            out.flush();

        } catch (FileUploadException ex) {
            logger.error(ex);
        }
    }
}

From source file:org.haedus.datatypes.phonetic.FeatureModel.java

public String getBestSymbol(List<Double> featureArray) {

    List<Double> bestFeatures = new ArrayList<Double>();
    String bestSymbol = "";
    double minimum = Double.MAX_VALUE;

    for (Map.Entry<String, List<Double>> entry : featureMap.entrySet()) {
        List<Double> features = entry.getValue();

        double difference = getDifferenceValue(featureArray, features);
        if (difference < minimum) {
            bestSymbol = entry.getKey();
            minimum = difference;//w  w w  .ja  v  a2 s  .  c  o  m
            bestFeatures = features;
        }
    }

    String bestDiacritic = "";
    if (minimum > 0.0) {
        bestDiacritic = getBestDiacritic(featureArray, bestFeatures);
    }
    return Normalizer.normalize(bestSymbol + bestDiacritic, Normalizer.Form.NFC);
}

From source file:pl.edu.icm.cermine.metadata.model.DocumentAffiliation.java

public String getOrganization() {
    for (Token<AffiliationLabel> token : tokens) {
        if (token.getLabel().equals(AffiliationLabel.INST)) {
            return Normalizer.normalize(token.getText(), Normalizer.Form.NFC);
        }//from w w  w .ja  va2 s.co  m
    }
    return null;
}

From source file:org.sonar.batch.scm.DefaultBlameOutput.java

private static String removeAccents(String inputString) {
    String unicodeDecomposedString = Normalizer.normalize(inputString, Normalizer.Form.NFD);
    return ACCENT_CODES.matcher(unicodeDecomposedString).replaceAll("");
}

From source file:com.continusec.client.ObjectHash.java

private static final byte[] hashString(String s, String r) throws ContinusecException {
    if (r != null && s.startsWith(r)) {
        try {/*from   w w w. j  a  va2s . co m*/
            return Hex.decodeHex(s.substring(r.length()).toCharArray());
        } catch (DecoderException e) {
            throw new InvalidObjectException(e);
        }
    } else {
        try {
            MessageDigest d = DigestUtils.getSha256Digest();
            d.update((byte) 'u');
            d.update(Normalizer.normalize(s, Normalizer.Form.NFC).getBytes("UTF8"));
            return d.digest();
        } catch (UnsupportedEncodingException e) {
            throw new InvalidObjectException(e);
        }
    }
}

From source file:biblivre3.z3950.Z3950Client.java

public List<Record> doSearch(final Z3950ServerDTO server, final Z3950SearchDTO search) {
    List<Record> listRecords = new ArrayList<Record>();

    factory.setHost(server.getUrl());//from w  ww . j av  a2s  .co  m
    factory.setPort(server.getPort());
    factory.setCharsetEncoding("UTF-8");

    factory.setApplicationContext(z3950Context);
    factory.setDefaultRecordSyntax("usmarc");
    factory.setDefaultElementSetName("F");

    factory.setDoCharsetNeg(true);

    factory.getRecordArchetypes().put("Default", "usmarc::F");
    factory.getRecordArchetypes().put("FullDisplay", "usmarc::F");
    factory.getRecordArchetypes().put("BriefDisplay", "usmarc::B");

    final String qry = QUERY_PREFIX + search.getType() + " \"" + TextUtils.removeDiacriticals(search.getValue())
            + "\"";

    IRQuery query = new IRQuery();
    query.collections = new Vector();
    query.collections.add(server.getCollection());
    query.query = new org.jzkit.search.util.QueryModel.PrefixString.PrefixString(qry);

    try {
        Searchable s = factory.newSearchable();
        s.setApplicationContext(z3950Context);
        IRResultSet result = s.evaluate(query);

        // Wait without timeout until result set is complete or failure
        result.waitForStatus(IRResultSetStatus.COMPLETE | IRResultSetStatus.FAILURE, 0);
        if (result.getStatus() == IRResultSetStatus.FAILURE) {
            log.error("IRResultSetStatus == FAILURE");
        }
        if (result.getFragmentCount() == 0) {
            return listRecords;

        }

        String encoding = server.getCharset();
        AnselToUnicode atu = new AnselToUnicode();

        Enumeration e = new ReadAheadEnumeration(result, new ArchetypeRecordFormatSpecification("Default"));
        int errorRecords = 0;
        Record record = null;
        for (int i = 0; e.hasMoreElements(); i++) {
            iso2709 o = (iso2709) e.nextElement();

            try {
                String iso = "";
                if (encoding.equals("MARC-8")) {
                    iso = Normalizer.normalize(
                            atu.convert(new String((byte[]) o.getOriginalObject(), "ISO-8859-1")),
                            Normalizer.Form.NFC);
                } else {
                    iso = new String((byte[]) o.getOriginalObject(), encoding);
                }

                try {
                    record = MarcUtils.iso2709ToRecordAsIso(iso, false);
                } catch (Exception encodeE) {
                }

                if (record == null) {
                    try {
                        record = MarcUtils.iso2709ToRecord(iso, false);
                    } catch (Exception encodeE) {
                    }
                }

                if (record == null) {
                    try {
                        record = MarcUtils.iso2709ToRecordAsIso(
                                new String((byte[]) o.getOriginalObject(), "ISO-8859-1"), false);
                    } catch (Exception encodeE) {
                    }
                }

                if (record == null) {
                    try {
                        record = MarcUtils.iso2709ToRecord(
                                new String((byte[]) o.getOriginalObject(), "ISO-8859-1"), false);
                    } catch (Exception encodeE) {
                    }
                }
            } catch (Exception ex) {
            }

            if (record != null) {
                listRecords.add(record);
            } else {
                ++errorRecords;
            }
        }
        if (errorRecords > 0) {
            log.warn("Total number of records that failed the conversion: " + errorRecords);
        }
        try {
            result.close();
            s.close();
        } catch (Exception closingException) {
            log.error(closingException.getMessage());
        }
    } catch (Exception e) {
        log.error(e.getMessage(), e);
    }
    log.info("returning results");
    return listRecords;
}

From source file:com.silverpeas.tags.navigation.links.CachedLinkGeneratorImpl.java

/**
 * Suppresion des accents d'une chaine de caractres.
 * @param s/*from ww w. j ava  2 s. c om*/
 * @return
 */
public String stripAccents(String s) {
    s = Normalizer.normalize(s, Normalizer.Form.NFD);
    s = s.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    return s;
}

From source file:fr.paris.lutece.plugins.profanityfilter.service.ProfanityFilter.java

public static String removeAccent(String source) {
    return Normalizer.normalize(source, Normalizer.Form.NFD).replaceAll("[\u0300-\u036F]", "");
}