List of usage examples for org.apache.commons.lang3 StringUtils getLevenshteinDistance
public static int getLevenshteinDistance(CharSequence s, CharSequence t)
From source file:org.starnub.utilities.strings.StringUtilities.java
/** * This will compare the percentage similarity of two words * * @param s String string to be compared against * @param s2 String string to compare//from ww w . ja v a2 s . co m * @return double the percentage of similarity * @throws ArithmeticException if issue calculating */ public static double similarityCalculationCaseInsensitive(String s, String s2) throws ArithmeticException { s = s.toLowerCase(); s2 = s2.toLowerCase(); int levDist = StringUtils.getLevenshteinDistance(s, s2); return s.length() > s2.length() ? Math.round(((double) levDist / (double) s.length()) * 100) : Math.round(((double) levDist / (double) s2.length()) * 100); }
From source file:org.starnub.utilities.strings.StringUtilities.java
/** * This will compare the percentage similarity of two words * * @param s String string to be compared against * @param s2 String string to compare// w w w . j a v a 2 s . co m * @return double the percentage of similarity * @throws ArithmeticException if issue calculating */ public static double similarityCalculation(String s, String s2) throws ArithmeticException { int levDist = StringUtils.getLevenshteinDistance(s, s2); return s.length() > s2.length() ? Math.round(((double) levDist / (double) s.length()) * 100) : Math.round(((double) levDist / (double) s2.length()) * 100); }
From source file:org.xlrnet.metadict.core.aggregation.LevenstheinRelevanceOrderStrategy.java
double calculateEntryScore(@NotNull ResultEntry entry, @NotNull String queryString) { int levenstheinInput = StringUtils.getLevenshteinDistance(entry.getSource().getGeneralForm().toLowerCase(), queryString.toLowerCase());/*from w w w . j av a 2s .co m*/ int levenstheinOutput = Integer.MAX_VALUE; if (entry.getTarget() != null && entry.getTarget().getGeneralForm() != null) levenstheinOutput = StringUtils.getLevenshteinDistance(entry.getTarget().getGeneralForm().toLowerCase(), queryString.toLowerCase()); int levensthein = Integer.min(levenstheinInput, levenstheinOutput); return 1.0 - ((double) levensthein / (1 + (double) levensthein)); }
From source file:org.xlrnet.metadict.impl.aggregation.LevenstheinRelevanceOrderStrategy.java
double calculateEntryScore(@NotNull ResultEntry entry, @NotNull String queryString) { int levenstheinInput = StringUtils.getLevenshteinDistance(entry.getInput().getGeneralForm().toLowerCase(), queryString.toLowerCase());/*from www . j a v a2 s . c om*/ int levenstheinOutput = Integer.MAX_VALUE; if (entry.getOutput() != null && entry.getOutput().getGeneralForm() != null) levenstheinOutput = StringUtils.getLevenshteinDistance(entry.getOutput().getGeneralForm().toLowerCase(), queryString.toLowerCase()); int levensthein = Integer.min(levenstheinInput, levenstheinOutput); return 1.0 - ((double) levensthein / (1 + (double) levensthein)); }
From source file:org.yamj.core.service.metadata.online.TheMovieDbApiWrapper.java
public String getPersonId(String name, boolean throwTempError) { String id = null;/*from www . j a v a 2 s . c o m*/ PersonFind closestPerson = null; int closestMatch = Integer.MAX_VALUE; boolean foundPerson = Boolean.FALSE; boolean includeAdult = configService.getBooleanProperty("themoviedb.includeAdult", Boolean.FALSE); try { ResultList<PersonFind> results = tmdbApi.searchPeople(name, 0, includeAdult, SearchType.PHRASE); LOG.info("{}: Found {} results", name, results.getResults().size()); for (PersonFind person : results.getResults()) { if (name.equalsIgnoreCase(person.getName())) { id = String.valueOf(person.getId()); foundPerson = Boolean.TRUE; break; } LOG.trace("{}: Checking against '{}'", name, person.getName()); int lhDistance = StringUtils.getLevenshteinDistance(name, person.getName()); LOG.trace("{}: Current closest match is {}, this match is {}", name, closestMatch, lhDistance); if (lhDistance < closestMatch) { LOG.trace("{}: TMDB ID {} is a better match ", name, person.getId()); closestMatch = lhDistance; closestPerson = person; } } if (foundPerson) { LOG.debug("{}: Matched against TMDB ID: {}", name, id); } else if (closestMatch < Integer.MAX_VALUE && closestPerson != null) { id = String.valueOf(closestPerson.getId()); LOG.debug("{}: Closest match is '{}' differing by {} characters", name, closestPerson.getName(), closestMatch); } else { LOG.debug("{}: No match found", name); } } catch (MovieDbException ex) { if (throwTempError && ResponseTools.isTemporaryError(ex)) { throw new TemporaryUnavailableException( "TheMovieDb service temporary not available: " + ex.getResponseCode(), ex); } LOG.error("Failed retrieving TMDb id for person '{}': {}", name, ex.getMessage()); LOG.trace("TheMovieDb error", ex); } return id; }
From source file:org.yamj.core.service.plugin.TheMovieDbScanner.java
@Override public String getPersonId(String name) { String id = ""; com.omertron.themoviedbapi.model.Person closestPerson = null; int closestMatch = Integer.MAX_VALUE; boolean foundPerson = Boolean.FALSE; boolean includeAdult = configService.getBooleanProperty("themoviedb.includeAdult", Boolean.FALSE); try {//from w ww. ja v a 2 s . com TmdbResultsList<com.omertron.themoviedbapi.model.Person> results = tmdbApi.searchPeople(name, includeAdult, 0); LOG.info("{}: Found {} results", name, results.getResults().size()); for (com.omertron.themoviedbapi.model.Person person : results.getResults()) { if (name.equalsIgnoreCase(person.getName())) { id = String.valueOf(person.getId()); foundPerson = Boolean.TRUE; break; } else { LOG.trace("{}: Checking against '{}'", name, person.getName()); int lhDistance = StringUtils.getLevenshteinDistance(name, person.getName()); LOG.trace("{}: Current closest match is {}, this match is {}", name, closestMatch, lhDistance); if (lhDistance < closestMatch) { LOG.trace("{}: TMDB ID {} is a better match ", name, person.getId()); closestMatch = lhDistance; closestPerson = person; } } } if (foundPerson) { LOG.debug("{}: Matched against TMDB ID: {}", name, id); } else if (closestMatch < Integer.MAX_VALUE && closestPerson != null) { id = String.valueOf(closestPerson.getId()); LOG.debug("{}: Closest match is '{}' differing by {} characters", name, closestPerson.getName(), closestMatch); } else { LOG.debug("{}: No match found", name); } } catch (MovieDbException ex) { LOG.warn("Failed to get information on '{}' from {}, error: {}", name, SCANNER_ID, ex.getMessage()); } return id; }
From source file:org.zeroturnaround.isjrebel.IsJRebel.java
public static boolean isJRebel(String input) { return Stream.of(input, reverse(input)).map(IsJRebel::dropPunctuation).map(IsJRebel::l33tReplace) .filter(s -> JREBEL.equalsIgnoreCase(s) || StringUtils.getLevenshteinDistance(JREBEL, s) <= 1) .findAny().isPresent();/*w ww.j ava 2 s.c o m*/ }
From source file:pl.piotr.TessOCR.java
/** * * @param img// w ww. j a v a2 s .c om * @return */ public static Receipt recognizeReceipt(File img) { int minEditLength = 100; Receipt receipt = null; try { String text = ocr.doOCR(img).toUpperCase(); System.out.println(text); Scanner scaner = new Scanner(text); String line = scaner.nextLine(); int tmp = 0; int LD; for (int i = 0; i < shopHeaderList.size(); i++) { LD = StringUtils.getLevenshteinDistance(line, shopHeaderList.get(i)); if (LD < minEditLength) { minEditLength = LD; tmp = i; } //System.out.println(LD); } //System.out.println(tmp); switch (tmp) { case 0: receipt = new Biedronka(); break; case 1: receipt = new Lidl(); break; case 2: receipt = new Tesco(); break; case 3: receipt = new Zabka(); break; } receipt.setDate(text); receipt.setProductList(text); receipt.setSum(text); } catch (TesseractException ex) { Logger.getLogger(TessOCR.class.getName()).log(Level.SEVERE, null, ex); } return receipt; }
From source file:principal.Main.java
public static void main(String[] args) throws UnknownHostException { MongoDBConection db = new MongoDBConection("dbmedicamentos"); String pesquisa = "SUPLEMENTO PARA MEIO DE CULTURA, VANCOMICINA, P P/ RECONSTITUIO, 3 M"; List<BasicDBObject> objects = db.getAllDocs("catmat"); List<LevenshteinRelevance> listaRelevantes = null; int menortxLev = 15; String melhorPalavra = ""; System.out.println("Buscando: " + pesquisa); for (BasicDBObject ob : objects) { // int idxof = ob.get("TIPL_DESCRICAO").toString().indexOf(','); // if(idxof>ob.get("TIPL_DESCRICAO").toString().length()) // idxof=ob.get("TIPL_DESCRICAO").toString().length()-1; String cmpLev = ob.get("TIPL_DESCRICAO").toString().replace(",", ""); // System.out.println("codigo: "+ob.getString("TIPL_CODIGO").toString()+" "+cmpLev); int txLev = StringUtils.getLevenshteinDistance(pesquisa, cmpLev); if (txLev < 20) { if (txLev < menortxLev) { menortxLev = txLev;/*from ww w. j a va 2 s . co m*/ melhorPalavra = ob.get("TIPL_DESCRICAO").toString(); } // System.out.println("Taxa Levenshtein: "+txLev); // System.out.println(ob.getString("TIPL_CODIGO").toString()+": "+cmpLev); // listaRelevantes.add(new LevenshteinRelevance(txLev, ob)); } } ; System.out.println("Menor valor txlev " + menortxLev + " Melhor texto: " + melhorPalavra); List<Medicamentos> med = db.doAdvancedSearch(melhorPalavra, "catmat"); for (Medicamentos m : med) { System.out.println("FTS search: " + m.getCodigo() + ":" + m.getDescricao()); } }
From source file:qa.aligner.SRLToAligner.java
public Sentence getCorrespondingSentence(String[] tokenizedText, ArrayList<Sentence> sentences) { StringBuffer sbTokenized = new StringBuffer(); sbTokenized.append(String.join(" ", tokenizedText)); StringBuffer sbSentence = new StringBuffer(); double sim = 0.0; for (int i = 0; i < sentences.size(); i++) { Sentence sentence = sentences.get(i); sbSentence.setLength(0);/*from w w w . j a v a 2 s . co m*/ for (int j = 1; j < sentence.size(); j++) { String form = sentence.get(j).getForm(); if (form.equalsIgnoreCase("-RRB-")) { sbSentence.append(") "); } else if (form.equalsIgnoreCase("-LRB-")) { sbSentence.append("( "); } else { sbSentence.append(sentence.get(j).getForm() + " "); } } double distance = StringUtils.getLevenshteinDistance(sbTokenized.toString(), sbSentence.toString()); //System.out.println(distance+ " "+ distance /Math.max(sbTokenized.length(), sbSentence.length())); sim = (1 - (distance / Math.max(sbTokenized.length(), sbSentence.length()))) * 100; //System.out.println("Sent : " + sbSentence.toString()); if (sim > 80) { //System.out.println(sbTokenized.toString()); //System.out.println(sbSentence.toString()); return sentence; } /*if (StringUtils.getLevenshteinDistance(sbTokenized.toString(), sbSentence.toString()) < 0.3 * sbTokenized.toString().length()) { return sentence; }*/ } System.out.println(sbTokenized.toString()); return null; /*for (int i = 0; i < sentences.size(); i++) { Sentence sentence = sentences.get(i); ArrayList<Word> words = new ArrayList<Word>(); for (int j = 1; j < sentence.size(); j++) { words.add(sentence.get(j)); } boolean equal = true; for (int k = 0; k < words.size() - 2; k++) { if (Pattern.matches("\\p{Punct}", tokenizedText[k])) { } else if (!words.get(k).getDeprel().equalsIgnoreCase("punct") && !tokenizedText[k].equalsIgnoreCase(words.get(k).getForm())) { equal = false; break; } } if (equal) { return sentence; } } return null;*/ }