Example usage for java.text Normalizer normalize

Introduction

In this page you can find the example usage for java.text Normalizer normalize.

Prototype

public static String normalize(CharSequence src, Form form)

Source Link

Document

Normalize a sequence of char values.

Usage

From source file:com.cardFetcher.Fetcher.ImageFetcher.java

private String getCardCode(JSONArray json, String cardName) {
    JSONObject curr = null;//  w  ww. j  a  v a2 s .  co m

    for (int x = 0; x < json.length(); x++) {
        curr = (JSONObject) json.get(x);
        String jsonName = curr.getString("title");

        //Normalizer is for special characters like in CT or Shi.Kyu (maybe more, but never ran into issues), still use formatStr() since there's always issues with names (Da gun, NeoTokyo, etc)
        if (formatStr(Normalizer.normalize(jsonName, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", ""))
                .toLowerCase().equals(Normalizer.normalize(cardName, Normalizer.Form.NFD)
                        .replaceAll("[^\\p{ASCII}]", "").toLowerCase())) {
            if (cardName.equals("Corporate Troubleshooter"))
                return "01065";
            return curr.getString("code");
        }

    }
    return null;
}

From source file:com.moviejukebox.plugin.AnimatorPlugin.java

/**
 * Retrieve Animator matching the specified movie name and year.
 *
 * This routine is base on a Google request.
 *//*from   w  ww. ja v  a2  s.  c om*/
private String getAnimatorId(String movieName, String year) {
    try {
        String animatorId = Movie.UNKNOWN;
        String allmultsId = Movie.UNKNOWN;

        String sb = movieName;
        // Unaccenting letters
        sb = Normalizer.normalize(sb, Normalizer.Form.NFD);
        // Return simple letters '' & ''
        sb = sb.replaceAll("" + (char) 774, "");
        sb = sb.replaceAll("" + (char) 774, "");
        sb = sb.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");

        sb = "text=" + URLEncoder.encode(sb, "Cp1251").replace(" ", "+");

        // Get ID from animator.ru
        if (animatorDiscovery) {
            String uri = "http://www.animator.ru/db/?p=search&SearchMask=1&" + sb;
            if (StringTools.isValidString(year)) {
                uri = uri + "&year0=" + year;
                uri = uri + "&year1=" + year;
            }
            String xml = httpClient.request(uri);
            // Checking for zero results
            if (xml.contains("[?? ]")) {
                // It's search results page, searching a link to the movie page
                int beginIndex;
                if (-1 != xml.indexOf("? ")) {
                    for (String tmp : HTMLTools.extractTags(xml, "? ", HTML_TD, HTML_HREF,
                            "<br><br>")) {
                        if (0 < tmp.indexOf("[?? ]")) {
                            beginIndex = tmp.indexOf(" .)");
                            if (beginIndex >= 0) {
                                String year2 = tmp.substring(beginIndex - 4, beginIndex);
                                if (year2.equals(year)) {
                                    beginIndex = tmp.indexOf("http://www.animator.ru/db/?p=show_film&fid=",
                                            beginIndex);
                                    if (beginIndex >= 0) {
                                        StringTokenizer st = new StringTokenizer(tmp.substring(beginIndex + 43),
                                                " ");
                                        animatorId = st.nextToken();
                                        break;
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        // Get ID from allmults.org
        if (multsDiscovery) {
            URL url = new URL("http://allmults.org/search.php");
            URLConnection conn = url.openConnection(YamjHttpClientBuilder.getProxy());
            conn.setDoOutput(true);

            OutputStreamWriter osWriter = null;
            StringBuilder xmlLines = new StringBuilder();

            try {
                osWriter = new OutputStreamWriter(conn.getOutputStream());
                osWriter.write(sb);
                osWriter.flush();

                try (InputStreamReader inReader = new InputStreamReader(conn.getInputStream(), "cp1251");
                        BufferedReader bReader = new BufferedReader(inReader)) {
                    String line;
                    while ((line = bReader.readLine()) != null) {
                        xmlLines.append(line);
                    }
                }

                osWriter.flush();
            } finally {
                if (osWriter != null) {
                    osWriter.close();
                }
            }

            if (xmlLines.indexOf("<div class=\"post\"") != -1) {
                for (String tmp : HTMLTools.extractTags(xmlLines.toString(),
                        "  ?  ", "<ul><li>", "<div class=\"entry\"",
                        "</div>")) {
                    int pos = tmp.indexOf("<img ");
                    if (pos != -1) {
                        int temp = tmp.indexOf(" alt=\"");
                        if (temp != -1) {
                            String year2 = tmp.substring(temp + 6, tmp.indexOf("\"", temp + 6) - 1);
                            year2 = year2.substring(year2.length() - 4);
                            if (year2.equals(year)) {
                                temp = tmp.indexOf(" src=\"/images/multiki/");
                                if (temp != -1) {
                                    allmultsId = tmp.substring(temp + 22, tmp.indexOf(".jpg", temp + 22));
                                    break;
                                }
                            }
                        }
                    }
                }
            }
        }

        return (animatorId.equals(Movie.UNKNOWN) && allmultsId.equals(Movie.UNKNOWN)) ? Movie.UNKNOWN
                : animatorId + ":" + allmultsId;
    } catch (IOException error) {
        LOG.error("Failed retreiving Animator Id for movie : {}", movieName);
        LOG.error("Error : {}", error.getMessage());
        return Movie.UNKNOWN;
    }
}

From source file:vaeke.restcountries.v0.service.CountryV0Service.java

private String normalize(String string) {
    return Normalizer.normalize(string, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
}

From source file:pl.edu.icm.coansys.commons.java.DiacriticsRemover.java

/**
 * Generates a sort key for a given text. This key is useful in environments
 * where only basic Latin characters are reliably sorted (for example, a
 * RDBMS with unknown collation settings).
 *
 * @param text Text to process./*  w w  w .j a v  a  2 s . c  o  m*/
 * @param idempotent Whether the conversion should be idempotent. This is
 * guaranteed to be true:
 * <code>alphaSortable(s, true).equals(alphaSortable(alphaSortable(s, true), true)</code>,
 * while this is not necessarily true:
 * <code>alphaSortable(s, false).equals(alphaSortable(alphaSortable(s, false), false)</code>.
 * @return
 */
public static String alphaSortable(String text, boolean idempotent) {
    if (text == null) {
        return null;
    }

    if (idempotent && text.startsWith(MAGIC)) {
        return text;
    }

    String tmp = text.toLowerCase(Locale.ENGLISH);
    tmp = Normalizer.normalize(tmp, Normalizer.Form.NFKD);

    StringBuilder builder = new StringBuilder();
    if (idempotent) {
        builder.append(MAGIC);
    }

    boolean wasSpaceSeparator = false;
    for (int i = 0; i < tmp.length(); i++) {
        Character ch = tmp.charAt(i);
        if (!ArrayUtils.contains(INTERESTING_TYPES, Character.getType(ch))
                && !ArrayUtils.contains(INTERESTING_CHARACTERS, ch)) {
            continue;
        }

        String s;

        // TODO quick fix of mantis 3231
        if (isSpaceSeparator(ch)) {
            if (wasSpaceSeparator) {
                continue;
            }
            wasSpaceSeparator = true;
        } else {
            wasSpaceSeparator = false;
        }

        if (alphaSortableMapping.containsKey(ch)) {
            s = alphaSortableMapping.get(ch);
        } else if (lookup.containsKey(ch)) {
            s = lookup.get(ch);
        } else {
            s = ch.toString();
        }

        for (int j = 0; j < s.length(); j++) {
            Character c = s.charAt(j);
            // TODO Very ugly workaround of the problem described in 0002643
            if (ArrayUtils.contains(INTERESTING_CHARACTERS, c)) {
                builder.append(c);
            } else {
                builder.append(StringUtils.leftPad(Integer.toHexString(c.charValue()), 4, '0'));
            }
        }
    }

    return builder.toString();
}

From source file:se.vgregion.alfresco.repo.scripts.StorageContentGet.java

@Override
public void execute(final WebScriptRequest req, final WebScriptResponse res) throws IOException {

    // create map of args
    final String[] names = req.getParameterNames();

    final Map<String, String> args = new HashMap<String, String>(names.length, 1.0f);

    for (final String name : names) {
        args.put(name, req.getParameter(name));
    }/*from w ww  .  j  a v  a2  s.co  m*/

    // create map of template vars
    final Map<String, String> templateVars = req.getServiceMatch().getTemplateVars();

    final String version = req.getParameter("version");

    final String id = parseId(templateVars);

    String targetFilename = templateVars.get("filename");

    final boolean nativ = StringUtils.isNotBlank(req.getParameter("native"))
            ? req.getParameter("native").equalsIgnoreCase("true")
            : false;

    // For testing purposes, let them specifically ask for the pdfa also
    final boolean pdfa = StringUtils.isNotBlank(req.getParameter("pdfa"))
            ? req.getParameter("pdfa").equalsIgnoreCase("true")
            : false;

    NodeRef nodeRef;

    if (StringUtils.isNotBlank(version)) {
        nodeRef = _storageService.getPublishedStorageVersion(id, version);
    } else {
        nodeRef = _storageService.getLatestPublishedStorageVersion(id);
    }

    if (nodeRef == null) {
        sendNotFoundStatus(req, res);
        return;
    }

    // must have a nodeRef for the filename later on, base it on the
    // original node, not an eventual published node
    NodeRef filenameNodeRef = nodeRef;

    // Inform Varnish about the access rights of this document.
    @SuppressWarnings("unchecked")
    List<String> accessRights = (List<String>) nodeService.getProperty(nodeRef, VgrModel.PROP_ACCESS_RIGHT);

    if (!CollectionUtils.isEmpty(accessRights)) {

        String accessRight = accessRights.get(0);

        // Remove accented characters
        accessRight = Normalizer.normalize(accessRight, Normalizer.Form.NFD).replaceAll("[^a-zA-z0-9_]+", "");
        if (LOG.isDebugEnabled()) {
            LOG.debug("setting response header X-CONTENT-ACCESS-RIGHT to :" + accessRight);
        }
        res.addHeader("X-CONTENT-ACCESS-RIGHT", accessRight);
    }

    // stream content on node, or rendition of node
    final String streamId = req.getParameter("streamId");

    if (!nativ) {

        String mimetype = _serviceUtils.getMimetype(filenameNodeRef);

        if (pdfa) {
            // for testing purposes, return the pdfa rendition upon request, but don't push
            nodeRef = _storageService.getOrCreatePublishedRendition(filenameNodeRef,
                    StorageService.DELIVERY_FORMAT_PDFA, false);
            //if the delivery format should be pdf, and the native file already is a pdf, deliver the native file
        } else if (MimetypeMap.MIMETYPE_PDF.equals(mimetype)
                && StorageService.DELIVERY_FORMAT_PDF.equalsIgnoreCase(pdfDeliveryFormat)) {
            nodeRef = filenameNodeRef;
            // if the native file is not a pdf get the format specified by pdfDeliveryFormat (injected from alfresco-global.properties) 
        } else if (!MimetypeMap.MIMETYPE_PDF.equals(mimetype)) {
            nodeRef = _storageService.getOrCreatePublishedRendition(filenameNodeRef, pdfDeliveryFormat);
        }

        if (nodeRef != null && !filenameNodeRef.equals(nodeRef) && !pdfa) { // Nevermind the caching when user specifically requests pdfa with request param.
            if (LOG.isDebugEnabled()) {
                LOG.debug("Returned " + pdfDeliveryFormat
                        + "-rendition, setting custom response header X-MAX-AGE to " + cacheMaxAgeLong
                        + " seconds.");
            }

            // Instruct varnish to cache by setting a custom X-MAX-AGE to 24 hours if its not a native file
            res.addHeader("X-MAX-AGE", cacheMaxAgeLong);

        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("nodeRef: " + nodeRef);
                LOG.debug("filenameNodeRef: " + filenameNodeRef);
                LOG.debug("indicates no " + pdfDeliveryFormat
                        + "-rendition, setting custom response header X-MAX-AGE to 0");
            }

            // A short max age on natives
            res.addHeader("X-MAX-AGE", cacheMaxAgeShort);

        }
    } else {
        // Short age cache native docs
        if (LOG.isDebugEnabled()) {
            LOG.debug(
                    "Returns native doc, setting response header max-age to " + cacheMaxAgeShort + " seconds.");
        }
        res.addHeader("X-MAX-AGE", cacheMaxAgeShort);

    }

    // Set source document id as XKEY which is used as identifier of document in Varnish, 
    // supplied later on when we want to purge a document.
    String sourceDocumentId = (String) nodeService.getProperty(filenameNodeRef,
            VgrModel.PROP_SOURCE_DOCUMENTID);
    res.addHeader("XKEY", "alfresco/" + sourceDocumentId);

    // determine attachment
    final boolean attach = Boolean.valueOf(req.getParameter("a"));

    if (streamId != null && streamId.length() > 0) {
        // render content rendition
        streamRendition(req, res, nodeRef, streamId, attach);
    } else {
        // render content
        QName propertyQName = ContentModel.PROP_CONTENT;

        final String contentPart = templateVars.get("property");

        if (contentPart.length() > 0 && contentPart.charAt(0) == ';') {
            if (contentPart.length() < 2) {
                throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, "Content property malformed");
            }

            final String propertyName = contentPart.substring(1);

            if (propertyName.length() > 0) {
                propertyQName = QName.createQName(propertyName, _namespaceService);
            }
        }

        final String filename = _storageService.extractFilename(filenameNodeRef, nodeRef, true);

        // if the targetFilename is blank and we're not going to attach the file,
        // then we redirect to the same URL but with the filename as parameter
        if (StringUtils.isBlank(targetFilename) && !attach) {
            String serverPath = req.getServerPath();
            String servicePath = req.getServicePath();
            String queryString = req.getQueryString();

            String url = serverPath + servicePath + "/"
                    + URLEncoder.encode(_storageService.extractFilename(filenameNodeRef, nodeRef, false)) + "?"
                    + queryString;

            if (LOG.isDebugEnabled()) {
                LOG.debug("Redirects to new location: " + url);
            }

            res.setHeader(WebScriptResponse.HEADER_LOCATION, url);
            res.setStatus(Status.STATUS_MOVED_TEMPORARILY);

            return;
        }

        // Stream the content
        streamContentLocal(req, res, nodeRef, attach, propertyQName, filename);
    }
}

From source file:NormSample.java

public void init() {

    // preparing values for the normalization forms ComboBox

    formValues.put("NFC", Normalizer.Form.NFC);
    formValues.put("NFD", Normalizer.Form.NFD);
    formValues.put("NFKC", Normalizer.Form.NFKC);
    formValues.put("NFKD", Normalizer.Form.NFKD);

    formComboBox = new JComboBox();

    for (Iterator it = formValues.keySet().iterator(); it.hasNext();) {
        formComboBox.addItem((String) it.next());
    }/*from w  ww  . j a  v  a  2 s  .c  o m*/

    // preparing samples for normalization

    // text with the acute accent symbol
    templateValues.put("acute accent", "touch" + "\u00e9");

    // text with ligature
    templateValues.put("ligature", "a" + "\ufb03" + "ance");

    // text with the cedilla
    templateValues.put("cedilla", "fa" + "\u00e7" + "ade");

    // text with half-width katakana
    templateValues.put("half-width katakana", "\uff81\uff6e\uff7a\uff9a\uff70\uff84");

    normalizationTemplate = new JComboBox();

    for (Iterator it = templateValues.keySet().iterator(); it.hasNext();) {
        normalizationTemplate.addItem((String) it.next());
    }

    // defining a component to output normalization results

    paintingComponent = new JComponent() {
        static final long serialVersionUID = -3725620407788489160L;

        public Dimension getSize() {
            return new Dimension(550, 200);
        }

        public Dimension getPreferredSize() {
            return new Dimension(550, 200);
        }

        public Dimension getMinimumSize() {
            return new Dimension(550, 200);
        }

        public void paint(Graphics g) {
            Graphics2D g2 = (Graphics2D) g;

            g2.setFont(new Font("Serif", Font.PLAIN, 20));
            g2.setColor(Color.BLACK);
            g2.drawString("Original string:", 100, 80);
            g2.drawString("Normalized string:", 100, 120);
            g2.setFont(new Font("Serif", Font.BOLD, 24));

            // output of the original sample selected from the ComboBox

            String original_string = templateValues.get(normalizationTemplate.getSelectedItem());
            g2.drawString(original_string, 320, 80);

            // normalization and output of the normalized string

            String normalized_string;
            java.text.Normalizer.Form currentForm = formValues.get(formComboBox.getSelectedItem());
            normalized_string = Normalizer.normalize(original_string, currentForm);
            g2.drawString(normalized_string, 320, 120);
        }
    };
    setLayout(new BoxLayout(this, BoxLayout.Y_AXIS));
    add(paintingComponent);
    JPanel controls = new JPanel();

    controls.setLayout(new BoxLayout(controls, BoxLayout.X_AXIS));
    controls.add(new Label("Normalization Form: "));
    controls.add(formComboBox);
    controls.add(new Label("Normalization Template:"));
    controls.add(normalizationTemplate);
    add(controls);
    formComboBox.addActionListener(new ActionListener() {
        public void actionPerformed(ActionEvent e) {
            paintingComponent.repaint();
        }
    });

    normalizationTemplate.addActionListener(new ActionListener() {
        public void actionPerformed(ActionEvent e) {
            paintingComponent.repaint();
        }
    });
}

From source file:annis.gui.flatquerybuilder.SpanBox.java

public static String removeAccents(String text) {
    return text == null ? null
            : Normalizer.normalize(text, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+",
                    "");
}

From source file:de.qaware.chronix.importer.csv.FileImporter.java

/**
 * Reads the given file / folder and calls the bi consumer with the extracted points
 *
 * @param points// www.j av a  2s  . co m
 * @param folder
 * @param databases
 * @return
 */
public Pair<Integer, Integer> importPoints(Map<Attributes, Pair<Instant, Instant>> points, File folder,
        BiConsumer<List<ImportPoint>, Attributes>... databases) {

    final AtomicInteger pointCounter = new AtomicInteger(0);
    final AtomicInteger tsCounter = new AtomicInteger(0);
    final File metricsFile = new File(METRICS_FILE_PATH);

    LOGGER.info("Writing imported metrics to {}", metricsFile);
    LOGGER.info("Import supports csv files as well as gz compressed csv files.");

    try {
        final FileWriter metricsFileWriter = new FileWriter(metricsFile);

        Collection<File> files = new ArrayList<>();
        if (folder.isFile()) {
            files.add(folder);
        } else {
            files.addAll(FileUtils.listFiles(folder, new String[] { "gz", "csv" }, true));
        }

        AtomicInteger counter = new AtomicInteger(0);

        files.parallelStream().forEach(file -> {
            SimpleDateFormat sdf = new SimpleDateFormat(dateFormat);
            NumberFormat nf = DecimalFormat.getInstance(numberLocal);

            InputStream inputStream = null;
            BufferedReader reader = null;
            try {
                inputStream = new FileInputStream(file);

                if (file.getName().endsWith("gz")) {
                    inputStream = new GZIPInputStream(inputStream);
                }
                reader = new BufferedReader(new InputStreamReader(inputStream));

                //Read the first line
                String headerLine = reader.readLine();

                if (headerLine == null || headerLine.isEmpty()) {
                    boolean deleted = deleteFile(file, inputStream, reader);
                    LOGGER.debug("File is empty {}. File {} removed {}", file.getName(), deleted);
                    return;
                }

                //Extract the attributes from the file name
                //E.g. first_second_third_attribute.csv
                String[] fileNameMetaData = file.getName().split("_");

                String[] metrics = headerLine.split(csvDelimiter);

                Map<Integer, Attributes> attributesPerTimeSeries = new HashMap<>(metrics.length);

                for (int i = 1; i < metrics.length; i++) {
                    String metric = metrics[i];
                    String metricOnlyAscii = Normalizer.normalize(metric, Normalizer.Form.NFD);
                    metricOnlyAscii = metric.replaceAll("[^\\x00-\\x7F]", "");
                    Attributes attributes = new Attributes(metricOnlyAscii, fileNameMetaData);

                    //Check if meta data is completely set
                    if (isEmpty(attributes)) {
                        boolean deleted = deleteFile(file, inputStream, reader);
                        LOGGER.info("Attributes contains empty values {}. File {} deleted {}", attributes,
                                file.getName(), deleted);
                        continue;
                    }

                    if (attributes.getMetric().equals(".*")) {
                        boolean deleted = deleteFile(file, inputStream, reader);
                        LOGGER.info("Attributes metric{}. File {} deleted {}", attributes.getMetric(),
                                file.getName(), deleted);
                        continue;
                    }
                    attributesPerTimeSeries.put(i, attributes);
                    tsCounter.incrementAndGet();

                }

                Map<Integer, List<ImportPoint>> dataPoints = new HashMap<>();

                String line;
                while ((line = reader.readLine()) != null) {
                    String[] splits = line.split(csvDelimiter);
                    String date = splits[0];

                    Instant dateObject;
                    if (instantDate) {
                        dateObject = Instant.parse(date);
                    } else if (sdfDate) {
                        dateObject = sdf.parse(date).toInstant();
                    } else {
                        dateObject = Instant.ofEpochMilli(Long.valueOf(date));
                    }

                    for (int column = 1; column < splits.length; column++) {

                        String value = splits[column];
                        double numericValue = nf.parse(value).doubleValue();

                        ImportPoint point = new ImportPoint(dateObject, numericValue);

                        if (!dataPoints.containsKey(column)) {
                            dataPoints.put(column, new ArrayList<>());
                        }
                        dataPoints.get(column).add(point);
                        pointCounter.incrementAndGet();
                    }

                }

                dataPoints.values().forEach(Collections::sort);

                IOUtils.closeQuietly(reader);
                IOUtils.closeQuietly(inputStream);

                dataPoints.forEach((key, importPoints) -> {
                    for (BiConsumer<List<ImportPoint>, Attributes> database : databases) {
                        database.accept(importPoints, attributesPerTimeSeries.get(key));
                    }
                    points.put(attributesPerTimeSeries.get(key), Pair.of(importPoints.get(0).getDate(),
                            importPoints.get(importPoints.size() - 1).getDate()));
                    //write the stats to the file
                    Instant start = importPoints.get(0).getDate();
                    Instant end = importPoints.get(importPoints.size() - 1).getDate();

                    try {
                        writeStatsLine(metricsFileWriter, attributesPerTimeSeries.get(key), start, end);
                    } catch (IOException e) {
                        LOGGER.error("Could not write stats line", e);
                    }
                    LOGGER.info("{} of {} time series imported", counter.incrementAndGet(), tsCounter.get());
                });

            } catch (Exception e) {
                LOGGER.info("Exception while reading points.", e);
            } finally {
                //close all streams
                IOUtils.closeQuietly(reader);
                IOUtils.closeQuietly(inputStream);
            }

        });
    } catch (Exception e) {
        LOGGER.error("Exception occurred during reading points.");
    }
    return Pair.of(tsCounter.get(), pointCounter.get());
}

From source file:org.voyanttools.trombone.model.Keywords.java

public void sort() {
    List<String> strings = new ArrayList<String>(keywords);
    Collections.sort(strings, new Comparator<String>() {
        @Override/*w  ww .j av  a2  s .  c  om*/
        public int compare(String s1, String s2) {
            return Normalizer.normalize(s1, Normalizer.Form.NFD)
                    .compareToIgnoreCase(Normalizer.normalize(s2, Normalizer.Form.NFD));
        }
    });
    keywords.clear();
    keywords.addAll(strings);
}

From source file:com.lloydtorres.stately.helpers.SparkleHelper.java

/**
 * Normalizes a given String to ASCII characters.
 * Source: http://stackoverflow.com/a/15191508
 * @param target//from  w  w w. j a va2 s.  c om
 * @return
 */
public static String normalizeToAscii(String target) {
    StringBuilder sb = new StringBuilder(target.length());
    target = Normalizer.normalize(target, Normalizer.Form.NFD);
    for (char c : target.toCharArray()) {
        if (c <= '\u007F')
            sb.append(c);
    }
    return sb.toString();
}