List of usage examples for java.text Normalizer normalize
public static String normalize(CharSequence src, Form form)
From source file:com.cardFetcher.Fetcher.ImageFetcher.java
private String getCardCode(JSONArray json, String cardName) { JSONObject curr = null;// w ww. j a v a2 s . co m for (int x = 0; x < json.length(); x++) { curr = (JSONObject) json.get(x); String jsonName = curr.getString("title"); //Normalizer is for special characters like in CT or Shi.Kyu (maybe more, but never ran into issues), still use formatStr() since there's always issues with names (Da gun, NeoTokyo, etc) if (formatStr(Normalizer.normalize(jsonName, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "")) .toLowerCase().equals(Normalizer.normalize(cardName, Normalizer.Form.NFD) .replaceAll("[^\\p{ASCII}]", "").toLowerCase())) { if (cardName.equals("Corporate Troubleshooter")) return "01065"; return curr.getString("code"); } } return null; }
From source file:com.moviejukebox.plugin.AnimatorPlugin.java
/** * Retrieve Animator matching the specified movie name and year. * * This routine is base on a Google request. *//*from w ww. ja v a2 s. c om*/ private String getAnimatorId(String movieName, String year) { try { String animatorId = Movie.UNKNOWN; String allmultsId = Movie.UNKNOWN; String sb = movieName; // Unaccenting letters sb = Normalizer.normalize(sb, Normalizer.Form.NFD); // Return simple letters '' & '' sb = sb.replaceAll("" + (char) 774, ""); sb = sb.replaceAll("" + (char) 774, ""); sb = sb.replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); sb = "text=" + URLEncoder.encode(sb, "Cp1251").replace(" ", "+"); // Get ID from animator.ru if (animatorDiscovery) { String uri = "http://www.animator.ru/db/?p=search&SearchMask=1&" + sb; if (StringTools.isValidString(year)) { uri = uri + "&year0=" + year; uri = uri + "&year1=" + year; } String xml = httpClient.request(uri); // Checking for zero results if (xml.contains("[?? ]")) { // It's search results page, searching a link to the movie page int beginIndex; if (-1 != xml.indexOf("? ")) { for (String tmp : HTMLTools.extractTags(xml, "? ", HTML_TD, HTML_HREF, "<br><br>")) { if (0 < tmp.indexOf("[?? ]")) { beginIndex = tmp.indexOf(" .)"); if (beginIndex >= 0) { String year2 = tmp.substring(beginIndex - 4, beginIndex); if (year2.equals(year)) { beginIndex = tmp.indexOf("http://www.animator.ru/db/?p=show_film&fid=", beginIndex); if (beginIndex >= 0) { StringTokenizer st = new StringTokenizer(tmp.substring(beginIndex + 43), " "); animatorId = st.nextToken(); break; } } } } } } } } // Get ID from allmults.org if (multsDiscovery) { URL url = new URL("http://allmults.org/search.php"); URLConnection conn = url.openConnection(YamjHttpClientBuilder.getProxy()); conn.setDoOutput(true); OutputStreamWriter osWriter = null; StringBuilder xmlLines = new StringBuilder(); try { osWriter = new OutputStreamWriter(conn.getOutputStream()); osWriter.write(sb); osWriter.flush(); try (InputStreamReader inReader = new InputStreamReader(conn.getInputStream(), "cp1251"); BufferedReader bReader = new BufferedReader(inReader)) { String line; while ((line = bReader.readLine()) != null) { xmlLines.append(line); } } osWriter.flush(); } finally { if (osWriter != null) { osWriter.close(); } } if (xmlLines.indexOf("<div class=\"post\"") != -1) { for (String tmp : HTMLTools.extractTags(xmlLines.toString(), " ? ", "<ul><li>", "<div class=\"entry\"", "</div>")) { int pos = tmp.indexOf("<img "); if (pos != -1) { int temp = tmp.indexOf(" alt=\""); if (temp != -1) { String year2 = tmp.substring(temp + 6, tmp.indexOf("\"", temp + 6) - 1); year2 = year2.substring(year2.length() - 4); if (year2.equals(year)) { temp = tmp.indexOf(" src=\"/images/multiki/"); if (temp != -1) { allmultsId = tmp.substring(temp + 22, tmp.indexOf(".jpg", temp + 22)); break; } } } } } } } return (animatorId.equals(Movie.UNKNOWN) && allmultsId.equals(Movie.UNKNOWN)) ? Movie.UNKNOWN : animatorId + ":" + allmultsId; } catch (IOException error) { LOG.error("Failed retreiving Animator Id for movie : {}", movieName); LOG.error("Error : {}", error.getMessage()); return Movie.UNKNOWN; } }
From source file:vaeke.restcountries.v0.service.CountryV0Service.java
private String normalize(String string) { return Normalizer.normalize(string, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); }
From source file:pl.edu.icm.coansys.commons.java.DiacriticsRemover.java
/** * Generates a sort key for a given text. This key is useful in environments * where only basic Latin characters are reliably sorted (for example, a * RDBMS with unknown collation settings). * * @param text Text to process./* w w w .j a v a 2 s . c o m*/ * @param idempotent Whether the conversion should be idempotent. This is * guaranteed to be true: * <code>alphaSortable(s, true).equals(alphaSortable(alphaSortable(s, true), true)</code>, * while this is not necessarily true: * <code>alphaSortable(s, false).equals(alphaSortable(alphaSortable(s, false), false)</code>. * @return */ public static String alphaSortable(String text, boolean idempotent) { if (text == null) { return null; } if (idempotent && text.startsWith(MAGIC)) { return text; } String tmp = text.toLowerCase(Locale.ENGLISH); tmp = Normalizer.normalize(tmp, Normalizer.Form.NFKD); StringBuilder builder = new StringBuilder(); if (idempotent) { builder.append(MAGIC); } boolean wasSpaceSeparator = false; for (int i = 0; i < tmp.length(); i++) { Character ch = tmp.charAt(i); if (!ArrayUtils.contains(INTERESTING_TYPES, Character.getType(ch)) && !ArrayUtils.contains(INTERESTING_CHARACTERS, ch)) { continue; } String s; // TODO quick fix of mantis 3231 if (isSpaceSeparator(ch)) { if (wasSpaceSeparator) { continue; } wasSpaceSeparator = true; } else { wasSpaceSeparator = false; } if (alphaSortableMapping.containsKey(ch)) { s = alphaSortableMapping.get(ch); } else if (lookup.containsKey(ch)) { s = lookup.get(ch); } else { s = ch.toString(); } for (int j = 0; j < s.length(); j++) { Character c = s.charAt(j); // TODO Very ugly workaround of the problem described in 0002643 if (ArrayUtils.contains(INTERESTING_CHARACTERS, c)) { builder.append(c); } else { builder.append(StringUtils.leftPad(Integer.toHexString(c.charValue()), 4, '0')); } } } return builder.toString(); }
From source file:se.vgregion.alfresco.repo.scripts.StorageContentGet.java
@Override public void execute(final WebScriptRequest req, final WebScriptResponse res) throws IOException { // create map of args final String[] names = req.getParameterNames(); final Map<String, String> args = new HashMap<String, String>(names.length, 1.0f); for (final String name : names) { args.put(name, req.getParameter(name)); }/*from w ww . j a v a2 s.co m*/ // create map of template vars final Map<String, String> templateVars = req.getServiceMatch().getTemplateVars(); final String version = req.getParameter("version"); final String id = parseId(templateVars); String targetFilename = templateVars.get("filename"); final boolean nativ = StringUtils.isNotBlank(req.getParameter("native")) ? req.getParameter("native").equalsIgnoreCase("true") : false; // For testing purposes, let them specifically ask for the pdfa also final boolean pdfa = StringUtils.isNotBlank(req.getParameter("pdfa")) ? req.getParameter("pdfa").equalsIgnoreCase("true") : false; NodeRef nodeRef; if (StringUtils.isNotBlank(version)) { nodeRef = _storageService.getPublishedStorageVersion(id, version); } else { nodeRef = _storageService.getLatestPublishedStorageVersion(id); } if (nodeRef == null) { sendNotFoundStatus(req, res); return; } // must have a nodeRef for the filename later on, base it on the // original node, not an eventual published node NodeRef filenameNodeRef = nodeRef; // Inform Varnish about the access rights of this document. @SuppressWarnings("unchecked") List<String> accessRights = (List<String>) nodeService.getProperty(nodeRef, VgrModel.PROP_ACCESS_RIGHT); if (!CollectionUtils.isEmpty(accessRights)) { String accessRight = accessRights.get(0); // Remove accented characters accessRight = Normalizer.normalize(accessRight, Normalizer.Form.NFD).replaceAll("[^a-zA-z0-9_]+", ""); if (LOG.isDebugEnabled()) { LOG.debug("setting response header X-CONTENT-ACCESS-RIGHT to :" + accessRight); } res.addHeader("X-CONTENT-ACCESS-RIGHT", accessRight); } // stream content on node, or rendition of node final String streamId = req.getParameter("streamId"); if (!nativ) { String mimetype = _serviceUtils.getMimetype(filenameNodeRef); if (pdfa) { // for testing purposes, return the pdfa rendition upon request, but don't push nodeRef = _storageService.getOrCreatePublishedRendition(filenameNodeRef, StorageService.DELIVERY_FORMAT_PDFA, false); //if the delivery format should be pdf, and the native file already is a pdf, deliver the native file } else if (MimetypeMap.MIMETYPE_PDF.equals(mimetype) && StorageService.DELIVERY_FORMAT_PDF.equalsIgnoreCase(pdfDeliveryFormat)) { nodeRef = filenameNodeRef; // if the native file is not a pdf get the format specified by pdfDeliveryFormat (injected from alfresco-global.properties) } else if (!MimetypeMap.MIMETYPE_PDF.equals(mimetype)) { nodeRef = _storageService.getOrCreatePublishedRendition(filenameNodeRef, pdfDeliveryFormat); } if (nodeRef != null && !filenameNodeRef.equals(nodeRef) && !pdfa) { // Nevermind the caching when user specifically requests pdfa with request param. if (LOG.isDebugEnabled()) { LOG.debug("Returned " + pdfDeliveryFormat + "-rendition, setting custom response header X-MAX-AGE to " + cacheMaxAgeLong + " seconds."); } // Instruct varnish to cache by setting a custom X-MAX-AGE to 24 hours if its not a native file res.addHeader("X-MAX-AGE", cacheMaxAgeLong); } else { if (LOG.isDebugEnabled()) { LOG.debug("nodeRef: " + nodeRef); LOG.debug("filenameNodeRef: " + filenameNodeRef); LOG.debug("indicates no " + pdfDeliveryFormat + "-rendition, setting custom response header X-MAX-AGE to 0"); } // A short max age on natives res.addHeader("X-MAX-AGE", cacheMaxAgeShort); } } else { // Short age cache native docs if (LOG.isDebugEnabled()) { LOG.debug( "Returns native doc, setting response header max-age to " + cacheMaxAgeShort + " seconds."); } res.addHeader("X-MAX-AGE", cacheMaxAgeShort); } // Set source document id as XKEY which is used as identifier of document in Varnish, // supplied later on when we want to purge a document. String sourceDocumentId = (String) nodeService.getProperty(filenameNodeRef, VgrModel.PROP_SOURCE_DOCUMENTID); res.addHeader("XKEY", "alfresco/" + sourceDocumentId); // determine attachment final boolean attach = Boolean.valueOf(req.getParameter("a")); if (streamId != null && streamId.length() > 0) { // render content rendition streamRendition(req, res, nodeRef, streamId, attach); } else { // render content QName propertyQName = ContentModel.PROP_CONTENT; final String contentPart = templateVars.get("property"); if (contentPart.length() > 0 && contentPart.charAt(0) == ';') { if (contentPart.length() < 2) { throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, "Content property malformed"); } final String propertyName = contentPart.substring(1); if (propertyName.length() > 0) { propertyQName = QName.createQName(propertyName, _namespaceService); } } final String filename = _storageService.extractFilename(filenameNodeRef, nodeRef, true); // if the targetFilename is blank and we're not going to attach the file, // then we redirect to the same URL but with the filename as parameter if (StringUtils.isBlank(targetFilename) && !attach) { String serverPath = req.getServerPath(); String servicePath = req.getServicePath(); String queryString = req.getQueryString(); String url = serverPath + servicePath + "/" + URLEncoder.encode(_storageService.extractFilename(filenameNodeRef, nodeRef, false)) + "?" + queryString; if (LOG.isDebugEnabled()) { LOG.debug("Redirects to new location: " + url); } res.setHeader(WebScriptResponse.HEADER_LOCATION, url); res.setStatus(Status.STATUS_MOVED_TEMPORARILY); return; } // Stream the content streamContentLocal(req, res, nodeRef, attach, propertyQName, filename); } }
From source file:NormSample.java
public void init() { // preparing values for the normalization forms ComboBox formValues.put("NFC", Normalizer.Form.NFC); formValues.put("NFD", Normalizer.Form.NFD); formValues.put("NFKC", Normalizer.Form.NFKC); formValues.put("NFKD", Normalizer.Form.NFKD); formComboBox = new JComboBox(); for (Iterator it = formValues.keySet().iterator(); it.hasNext();) { formComboBox.addItem((String) it.next()); }/*from w ww . j a v a 2 s .c o m*/ // preparing samples for normalization // text with the acute accent symbol templateValues.put("acute accent", "touch" + "\u00e9"); // text with ligature templateValues.put("ligature", "a" + "\ufb03" + "ance"); // text with the cedilla templateValues.put("cedilla", "fa" + "\u00e7" + "ade"); // text with half-width katakana templateValues.put("half-width katakana", "\uff81\uff6e\uff7a\uff9a\uff70\uff84"); normalizationTemplate = new JComboBox(); for (Iterator it = templateValues.keySet().iterator(); it.hasNext();) { normalizationTemplate.addItem((String) it.next()); } // defining a component to output normalization results paintingComponent = new JComponent() { static final long serialVersionUID = -3725620407788489160L; public Dimension getSize() { return new Dimension(550, 200); } public Dimension getPreferredSize() { return new Dimension(550, 200); } public Dimension getMinimumSize() { return new Dimension(550, 200); } public void paint(Graphics g) { Graphics2D g2 = (Graphics2D) g; g2.setFont(new Font("Serif", Font.PLAIN, 20)); g2.setColor(Color.BLACK); g2.drawString("Original string:", 100, 80); g2.drawString("Normalized string:", 100, 120); g2.setFont(new Font("Serif", Font.BOLD, 24)); // output of the original sample selected from the ComboBox String original_string = templateValues.get(normalizationTemplate.getSelectedItem()); g2.drawString(original_string, 320, 80); // normalization and output of the normalized string String normalized_string; java.text.Normalizer.Form currentForm = formValues.get(formComboBox.getSelectedItem()); normalized_string = Normalizer.normalize(original_string, currentForm); g2.drawString(normalized_string, 320, 120); } }; setLayout(new BoxLayout(this, BoxLayout.Y_AXIS)); add(paintingComponent); JPanel controls = new JPanel(); controls.setLayout(new BoxLayout(controls, BoxLayout.X_AXIS)); controls.add(new Label("Normalization Form: ")); controls.add(formComboBox); controls.add(new Label("Normalization Template:")); controls.add(normalizationTemplate); add(controls); formComboBox.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { paintingComponent.repaint(); } }); normalizationTemplate.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { paintingComponent.repaint(); } }); }
From source file:annis.gui.flatquerybuilder.SpanBox.java
public static String removeAccents(String text) { return text == null ? null : Normalizer.normalize(text, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); }
From source file:de.qaware.chronix.importer.csv.FileImporter.java
/** * Reads the given file / folder and calls the bi consumer with the extracted points * * @param points// www.j av a 2s . co m * @param folder * @param databases * @return */ public Pair<Integer, Integer> importPoints(Map<Attributes, Pair<Instant, Instant>> points, File folder, BiConsumer<List<ImportPoint>, Attributes>... databases) { final AtomicInteger pointCounter = new AtomicInteger(0); final AtomicInteger tsCounter = new AtomicInteger(0); final File metricsFile = new File(METRICS_FILE_PATH); LOGGER.info("Writing imported metrics to {}", metricsFile); LOGGER.info("Import supports csv files as well as gz compressed csv files."); try { final FileWriter metricsFileWriter = new FileWriter(metricsFile); Collection<File> files = new ArrayList<>(); if (folder.isFile()) { files.add(folder); } else { files.addAll(FileUtils.listFiles(folder, new String[] { "gz", "csv" }, true)); } AtomicInteger counter = new AtomicInteger(0); files.parallelStream().forEach(file -> { SimpleDateFormat sdf = new SimpleDateFormat(dateFormat); NumberFormat nf = DecimalFormat.getInstance(numberLocal); InputStream inputStream = null; BufferedReader reader = null; try { inputStream = new FileInputStream(file); if (file.getName().endsWith("gz")) { inputStream = new GZIPInputStream(inputStream); } reader = new BufferedReader(new InputStreamReader(inputStream)); //Read the first line String headerLine = reader.readLine(); if (headerLine == null || headerLine.isEmpty()) { boolean deleted = deleteFile(file, inputStream, reader); LOGGER.debug("File is empty {}. File {} removed {}", file.getName(), deleted); return; } //Extract the attributes from the file name //E.g. first_second_third_attribute.csv String[] fileNameMetaData = file.getName().split("_"); String[] metrics = headerLine.split(csvDelimiter); Map<Integer, Attributes> attributesPerTimeSeries = new HashMap<>(metrics.length); for (int i = 1; i < metrics.length; i++) { String metric = metrics[i]; String metricOnlyAscii = Normalizer.normalize(metric, Normalizer.Form.NFD); metricOnlyAscii = metric.replaceAll("[^\\x00-\\x7F]", ""); Attributes attributes = new Attributes(metricOnlyAscii, fileNameMetaData); //Check if meta data is completely set if (isEmpty(attributes)) { boolean deleted = deleteFile(file, inputStream, reader); LOGGER.info("Attributes contains empty values {}. File {} deleted {}", attributes, file.getName(), deleted); continue; } if (attributes.getMetric().equals(".*")) { boolean deleted = deleteFile(file, inputStream, reader); LOGGER.info("Attributes metric{}. File {} deleted {}", attributes.getMetric(), file.getName(), deleted); continue; } attributesPerTimeSeries.put(i, attributes); tsCounter.incrementAndGet(); } Map<Integer, List<ImportPoint>> dataPoints = new HashMap<>(); String line; while ((line = reader.readLine()) != null) { String[] splits = line.split(csvDelimiter); String date = splits[0]; Instant dateObject; if (instantDate) { dateObject = Instant.parse(date); } else if (sdfDate) { dateObject = sdf.parse(date).toInstant(); } else { dateObject = Instant.ofEpochMilli(Long.valueOf(date)); } for (int column = 1; column < splits.length; column++) { String value = splits[column]; double numericValue = nf.parse(value).doubleValue(); ImportPoint point = new ImportPoint(dateObject, numericValue); if (!dataPoints.containsKey(column)) { dataPoints.put(column, new ArrayList<>()); } dataPoints.get(column).add(point); pointCounter.incrementAndGet(); } } dataPoints.values().forEach(Collections::sort); IOUtils.closeQuietly(reader); IOUtils.closeQuietly(inputStream); dataPoints.forEach((key, importPoints) -> { for (BiConsumer<List<ImportPoint>, Attributes> database : databases) { database.accept(importPoints, attributesPerTimeSeries.get(key)); } points.put(attributesPerTimeSeries.get(key), Pair.of(importPoints.get(0).getDate(), importPoints.get(importPoints.size() - 1).getDate())); //write the stats to the file Instant start = importPoints.get(0).getDate(); Instant end = importPoints.get(importPoints.size() - 1).getDate(); try { writeStatsLine(metricsFileWriter, attributesPerTimeSeries.get(key), start, end); } catch (IOException e) { LOGGER.error("Could not write stats line", e); } LOGGER.info("{} of {} time series imported", counter.incrementAndGet(), tsCounter.get()); }); } catch (Exception e) { LOGGER.info("Exception while reading points.", e); } finally { //close all streams IOUtils.closeQuietly(reader); IOUtils.closeQuietly(inputStream); } }); } catch (Exception e) { LOGGER.error("Exception occurred during reading points."); } return Pair.of(tsCounter.get(), pointCounter.get()); }
From source file:org.voyanttools.trombone.model.Keywords.java
public void sort() { List<String> strings = new ArrayList<String>(keywords); Collections.sort(strings, new Comparator<String>() { @Override/*w ww .j av a2 s . c om*/ public int compare(String s1, String s2) { return Normalizer.normalize(s1, Normalizer.Form.NFD) .compareToIgnoreCase(Normalizer.normalize(s2, Normalizer.Form.NFD)); } }); keywords.clear(); keywords.addAll(strings); }
From source file:com.lloydtorres.stately.helpers.SparkleHelper.java
/** * Normalizes a given String to ASCII characters. * Source: http://stackoverflow.com/a/15191508 * @param target//from w w w. j a va2 s. c om * @return */ public static String normalizeToAscii(String target) { StringBuilder sb = new StringBuilder(target.length()); target = Normalizer.normalize(target, Normalizer.Form.NFD); for (char c : target.toCharArray()) { if (c <= '\u007F') sb.append(c); } return sb.toString(); }