List of usage examples for java.text Normalizer normalize
public static String normalize(CharSequence src, Form form)
From source file:com.moviejukebox.plugin.KinopoiskPlugin.java
/** * Retrieve Kinopoisk matching the specified movie name and year. This routine is base on a Google request. * * @param movieName//from w ww . j a v a 2 s . c om * @param year * @param season * @return */ public String getKinopoiskId(String movieName, String year, int season) { try { String kinopoiskId; String sb = movieName; // Unaccenting letters sb = Normalizer.normalize(sb, Normalizer.Form.NFD); sb = sb.replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); sb = "&m_act[find]=" + URLEncoder.encode(sb, "UTF-8").replace(" ", "+"); if (season != -1) { sb = sb + "&m_act[content_find]=serial"; } else if (StringTools.isValidString(year)) { if (year.indexOf('-') > -1) { String[] years = year.split("-"); sb = sb + "&m_act[from_year]=" + years[0]; sb = sb + "&m_act[to_year]=" + years[1]; } else { sb = sb + "&m_act[year]=" + year; } } sb = "http://kinopoisk.ru/index.php?level=7&from=forma&result=adv&m_act[from]=forma&m_act[what]=content" + sb; String xml = httpClient.request(sb, CHARSET); // Checking for zero results if (!xml.contains("class=\"search_results\"")) { // Checking direct movie page if (xml.contains("class=\"moviename-big\"")) { return HTMLTools.extractTag(xml, "id_film = ", ";"); } return Movie.UNKNOWN; } // Checking if we got the movie page directly int beginIndex = xml.indexOf("id_film = "); if (beginIndex == -1) { // It's search results page, searching a link to the movie page beginIndex = xml.indexOf("class=\"search_results\""); if (beginIndex == -1) { return Movie.UNKNOWN; } beginIndex = xml.indexOf("/level/1/film/", beginIndex); if (beginIndex == -1) { return Movie.UNKNOWN; } StringTokenizer st = new StringTokenizer(xml.substring(beginIndex + 14), "/\""); kinopoiskId = st.nextToken(); } else { // It's the movie page StringTokenizer st = new StringTokenizer(xml.substring(beginIndex + 10), ";"); kinopoiskId = st.nextToken(); } if (StringTools.isValidString(kinopoiskId) && StringUtils.isNumeric(kinopoiskId)) { return kinopoiskId; } } catch (IOException error) { LOG.error("Failed retreiving Kinopoisk Id for movie : {}", movieName); LOG.error("Error : {}", error.getMessage()); } return Movie.UNKNOWN; }
From source file:com.otaupdater.utils.Utils.java
public static String sanitizeName(String name) { if (name == null) return ""; name = Normalizer.normalize(name, Normalizer.Form.NFD); name = name.trim();// w ww . j a v a2s. c om name = name.replaceAll("[^\\p{ASCII}]", ""); name = name.replaceAll("[ _-]+", "_"); name = name.replaceAll("(^_|_$)", ""); name = name.toLowerCase(Locale.US); return name; }
From source file:org.jets3t.service.utils.FileComparer.java
/** * Normalize string into "Normalization Form Canonical Decomposition" (NFD). * * References:/*w w w . j a v a2 s. c o m*/ * http://stackoverflow.com/questions/3610013 * http://en.wikipedia.org/wiki/Unicode_equivalence * * @param str * @return string normalized into NFC form. */ protected String normalizeUnicode(String str) { Normalizer.Form form = Normalizer.Form.NFD; if (!Normalizer.isNormalized(str, form)) { return Normalizer.normalize(str, form); } return str; }
From source file:com.viettel.util.StringUtils.java
public static String unAccent(String s) { String temp = Normalizer.normalize(s, Normalizer.Form.NFD); Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); return pattern.matcher(temp).replaceAll("").replaceAll("?", "D").replaceAll("", "d"); }
From source file:org.nuxeo.ecm.platform.filemanager.TestFileManagerService.java
@Test public void testCreateExistingBlobWithNonNFCNormalizedFilename() throws Exception { // Create doc from NFC normalized filename String fileName = " .rtf"; String nfcNormalizedFileName = Normalizer.normalize(fileName, Normalizer.Form.NFC); Blob blob = Blobs.createBlob("Test content", "text/rtf", null, nfcNormalizedFileName); service.createDocumentFromBlob(coreSession, blob, workspace.getPathAsString(), true, nfcNormalizedFileName); assertNotNull(FileManagerUtils.getExistingDocByFileName(coreSession, workspace.getPathAsString(), nfcNormalizedFileName));/*from www .ja v a 2 s.c om*/ // Check existing doc with non NFC (NFD) normalized filename String nfdNormalizedFileName = Normalizer.normalize(fileName, Normalizer.Form.NFD); assertNotNull(FileManagerUtils.getExistingDocByFileName(coreSession, workspace.getPathAsString(), nfdNormalizedFileName)); }
From source file:br.com.pprv.web.control.beans.tecnica.TecnicaBean.java
/** * metodo utilizado para fazer o upload dos arquivos. * * @param uploadedFile/*from w w w . j av a 2 s . c o m*/ * @return */ public boolean doUpload(UploadedFile uploadedFile) { boolean result = false; SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd_HHmmss"); String fileExtension = uploadedFile.getFileName(); String nmDescTemplate = Normalizer.normalize(tbequipamentoSelected.getNmequipamenta(), Normalizer.Form.NFD); nmDescTemplate = nmDescTemplate.replaceAll("[^\\p{ASCII}]", "").replace("\"", ""); String fileName = nmDescTemplate.toUpperCase() + "_" + sdf.format(new Date()) + fileExtension.substring(fileExtension.lastIndexOf('.'), fileExtension.length()); File file = new File(CAMINHO, fileName); final TbarquivosEquipamentoPK tbarquivosEquipamentoPK = new TbarquivosEquipamentoPK(); tbarquivosEquipamentoPK.setIdequipamento(tbequipamentoSelected.getIdequipamento()); tbarquivosEquipamentoPK.setTmdataupload(new Date()); final TbarquivosEquipamento tbarquivosEquipamento = new TbarquivosEquipamento(); tbarquivosEquipamento.setIdusuario(Shareds.getUser()); tbarquivosEquipamento.setNmarquivo(fileName); tbarquivosEquipamento.setTbarquivosEquipamentoPK(tbarquivosEquipamentoPK); tbarquivosEquipamento.setTbequipamento(tbequipamentoSelected); try { try (FileOutputStream fileOutput = new FileOutputStream(file)) { fileOutput.write(IOUtils.toByteArray(uploadedFile.getInputstream())); fileOutput.flush(); fileOutput.close(); } result = true; if (arquivosEquipamentoLogic.createTbarquivosEquipamento(tbarquivosEquipamento)) { tbequipamentoSelected.getTbarquivosEquipamentoList().add(tbarquivosEquipamento); } } catch (FileNotFoundException ex) { AbstractFacesContextUtils.addMessageError("Falha ao encontrar o arquivo."); ex.printStackTrace(System.err); } catch (IOException ex) { AbstractFacesContextUtils.addMessageError("Falha ao abrir o arquivo."); ex.printStackTrace(System.err); } return result; }
From source file:org.structr.core.function.Functions.java
public static String cleanString(final Object input) { if (input == null) { return ""; }/* w w w.ja v a 2 s . co m*/ String normalized = Normalizer.normalize(input.toString(), Normalizer.Form.NFD).replaceAll("\\<", "") .replaceAll("\\>", "").replaceAll("\\.", "").replaceAll("\\'", "-").replaceAll("\\?", "") .replaceAll("\\(", "").replaceAll("\\)", "").replaceAll("\\{", "").replaceAll("\\}", "") .replaceAll("\\[", "").replaceAll("\\]", "").replaceAll("\\+", "-").replaceAll("/", "-") .replaceAll("", "-").replaceAll("\\\\", "-").replaceAll("\\|", "-").replaceAll("'", "-") .replaceAll("!", "").replaceAll(",", "").replaceAll("-", " ").replaceAll("_", " ") .replaceAll("`", "-"); String result = normalized.replaceAll("-", " "); result = StringUtils.normalizeSpace(result.toLowerCase()); result = result.replaceAll("[^\\p{ASCII}]", "").replaceAll("\\p{P}", "-").replaceAll("\\-(\\s+\\-)+", "-"); result = result.replaceAll(" ", "-"); return result; }
From source file:org.geosdi.geoplatform.gui.server.service.impl.GPCatalogFinderService.java
public String deAccent(String str) { String nfdNormalizedString = Normalizer.normalize(str, Normalizer.Form.NFD); Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); return pattern.matcher(nfdNormalizedString).replaceAll("'"); }
From source file:com.cloudbees.hudson.plugins.folder.ChildNameGeneratorTest.java
private void checkChild(ComputedFolderImpl instance, String idealName) throws IOException { String encodedName = encode(idealName); FreeStyleProject item = instance.getItem(encodedName); assertThat("We have an item for name " + idealName, item, notNullValue()); assertThat("The root directory of the item for name " + idealName + " is mangled", item.getRootDir().getName(), is(mangle(idealName))); String altEncoding = Normalizer.normalize(idealName, Normalizer.Form.NFD); if (idealName.equals(altEncoding)) { altEncoding = Normalizer.normalize(idealName, Normalizer.Form.NFC); }/*from w w w .j av a 2 s . c o m*/ if (!idealName.equals(altEncoding)) { File altRootDir = instance.getRootDirFor(altEncoding); assertThat("Alternative normalized form: " + altRootDir + " does not exist", altRootDir.isDirectory(), is(false)); } File nameFile = new File(item.getRootDir(), ChildNameGenerator.CHILD_NAME_FILE); assertThat("We have the " + ChildNameGenerator.CHILD_NAME_FILE + " for the item for name " + idealName, nameFile.isFile(), is(true)); String name = FileUtils.readFileToString(nameFile); assertThat("The " + ChildNameGenerator.CHILD_NAME_FILE + " for the item for name " + idealName + " contains the encoded name", name, is(encodedName)); }
From source file:com.github.bfour.fpliteraturecollector.service.DefaultLiteratureService.java
/** * Determines whether a pair of two Literatures maybe is a duplicate pair. * Employs comparative measures that might lead to false positives. * //from w w w . j a v a2 s .c o m * @param litA * @param litB * @return */ private boolean isProbableDuplicate(Literature litA, Literature litB) { if (isCertainDuplicate(litA, litB)) return true; // 1 character different for every 14 characters if (StringUtils.getLevenshteinDistance( Normalizer.normalize(litA.getTitle(), Normalizer.Form.NFD).toLowerCase(), Normalizer.normalize(litB.getTitle(), Normalizer.Form.NFD) .toLowerCase()) <= (litA.getTitle().length() / 14)) return true; if (litA.getISBN() != null && litB.getISBN() != null && litA.getISBN().equals(litB.getISBN())) return true; return false; }