List of usage examples for org.apache.commons.lang3 StringUtils getLevenshteinDistance
public static int getLevenshteinDistance(CharSequence s, CharSequence t)
From source file:org.lambda3.indra.filter.DistanceStringFilter.java
@Override public boolean matches(String t1, String t2) { return t1.length() >= this.threshold && StringUtils.getLevenshteinDistance(t1, t2) < min; }
From source file:org.lanes.utility.string.FuzzyMatcher.java
public static double stringSim(String str1, String str2) { double curveconstant = 0.2;//0.05 (close to straight line) double scaleconstant = 2.71799; str1 = str1.toLowerCase();//from w ww . j a v a2 s . c o m str2 = str2.toLowerCase(); int edist = StringUtils.getLevenshteinDistance(str1, str2); double sim = 0; if (edist == 0) { sim = 1; } else { sim = Math.exp(-(Math.pow(edist, curveconstant))) * scaleconstant; } return sim; }
From source file:org.languagetool.dev.wordsimilarity.SimilarWordFinder.java
private List<SimWord> findSimilarWordsFor(DirectoryReader reader, String word, TopDocs topDocs) throws IOException { List<SimWord> result = new ArrayList<>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { String simWord = reader.document(scoreDoc.doc).get("word"); //System.out.println(" sim: " + simWord); if (!simWord.equalsIgnoreCase(word) && !knownPairs.contains(simWord, word)) { int firstDiffPos = getDiffPos(simWord.toLowerCase(), word.toLowerCase()); int limit = Math.min(word.length(), simWord.length()) - 1; if (firstDiffPos > limit) { //System.out.println("FILTERED: " + word + " -> " + simWord + " [" + firstDiffPos + " <= " + limit + "]"); } else { int dist = StringUtils.getLevenshteinDistance(word, simWord); if (dist <= MAX_DIST) { //System.out.println(word + " -> " + simWord + " [" + firstDiffPos + "]"); result.add(new SimWord(simWord, dist)); }//from w w w . j av a 2 s .c o m } knownPairs.add(simWord, word); } } return result; }
From source file:org.languagetool.rules.de.SimilarNameRule.java
@Nullable private String similarName(String nameHere, Set<String> namesSoFar) { for (String name : namesSoFar) { if (name.equals(nameHere)) { continue; }// w w w. j a va2s .com int lenDiff = Math.abs(name.length() - nameHere.length()); boolean nameEndsWithS = name.endsWith("s") && !nameHere.endsWith("s"); boolean otherNameEndsWithS = !name.endsWith("s") && nameHere.endsWith("s"); boolean nameEndsWithN = name.endsWith("n") && !nameHere.endsWith("n"); // probably a dative boolean otherNameEndsWithN = !name.endsWith("n") && nameHere.endsWith("n"); if (nameEndsWithS || otherNameEndsWithS || nameEndsWithN || otherNameEndsWithN) { // we assume this is a genitive, e.g. "Angela Merkels Ehemann" continue; } if (lenDiff <= MAX_DIFF && StringUtils.getLevenshteinDistance(name, nameHere) <= MAX_DIFF) { return name; } } return null; }
From source file:org.languagetool.rules.spelling.SpellingCheckRule.java
@Experimental protected List<String> reorderSuggestions(List<String> suggestions, String word) { // WORK IN PROGRESS if (languageModel == null) { return suggestions; }// w w w .ja v a 2s . c om BaseLanguageModel lm = (BaseLanguageModel) languageModel; List<Integer> levenshteinDistances = suggestions.stream() .map(suggestion -> StringUtils.getLevenshteinDistance(word, suggestion)) .collect(Collectors.toList()); List<Long> frequencies = suggestions.stream().map(lm::getCount).collect(Collectors.toList()); Long frequenciesSum = frequencies.stream().reduce((a, b) -> a + b).orElse(1L); List<Float> normalizedFrequencies = frequencies.stream().map(f -> (float) f / frequenciesSum) .collect(Collectors.toList()); System.out.println("frequencies: " + frequencies + " / normalized: " + normalizedFrequencies); List<Pair<String, Float>> scoredSuggestions = new ArrayList<>(suggestions.size()); for (int i = 0; i < suggestions.size(); i++) { float score = (1f / normalizedFrequencies.get(i)) * levenshteinDistances.get(i); scoredSuggestions.add(Pair.of(suggestions.get(i), score)); } scoredSuggestions.sort(Comparator.comparing(Pair::getRight)); System.out.println( "Before reordering: " + suggestions.subList(0, 5) + " / After: " + scoredSuggestions.subList(0, 5)); return scoredSuggestions.stream().map(Pair::getLeft).collect(Collectors.toList()); }
From source file:org.lanternpowered.server.data.manipulator.gen.DataManipulatorGenerator.java
@SuppressWarnings("unchecked") private static Key[] findKeyMatches(List<Method> methods, Set<Key<?>> requiredKeys) { Key[] keys = new Key[methods.size()]; for (int i = 0; i < methods.size(); i++) { final Method method = methods.get(i); final String methodName = DataHelper.camelToSnake(method.getName()); int closestDistance = Integer.MAX_VALUE; Key closestKey = null;/*from w w w . j a v a2 s. c om*/ for (Key key : requiredKeys) { String keyId = key.getId(); final int index = keyId.indexOf(':'); if (index != -1) { keyId = keyId.substring(index + 1); } final int distance = StringUtils.getLevenshteinDistance(methodName, keyId); if (distance < closestDistance) { closestDistance = distance; closestKey = key; } } if (closestKey == null) { throw new IllegalStateException("No key match could be found for the method: " + method); } keys[i] = closestKey; } return keys; }
From source file:org.linqs.psl.utils.textsimilarity.LevenshteinSimilarity.java
@Override public double getValue(ReadableDatabase db, Constant... args) { String a = ((StringAttribute) args[0]).getValue(); String b = ((StringAttribute) args[1]).getValue(); int maxLen = Math.max(a.length(), b.length()); if (maxLen == 0) return 1.0; double ldist = StringUtils.getLevenshteinDistance(a, b); double sim = 1.0 - (ldist / maxLen); if (sim > similarityThreshold) return sim; return 0.0;//from ww w. j a v a2 s .c om }
From source file:org.mousephenotype.www.testing.model.TestUtils.java
/** * Returns the closest match to <code>stringToMatch</code> in * <code>set</code>//w w w . j a v a2s . c o m * * @param set the set to search * * @param stringToMatch the string to match * * @return the closest match to <code>stringToMatch</code> in <code>set</code> */ public static String closestMatch(Set<String> set, String stringToMatch) { String matchedString = ""; Integer matchedScore = null; if ((set == null) || (stringToMatch == null)) return matchedString; Iterator<String> it = set.iterator(); while (it.hasNext()) { String candidate = it.next(); int candidateScore = StringUtils.getLevenshteinDistance(candidate, stringToMatch); if (matchedString.isEmpty()) { // First time through, populate matchedXxx. matchedString = candidate; matchedScore = candidateScore; } else { if ((candidateScore >= 0) && (candidateScore < matchedScore)) { matchedScore = candidateScore; matchedString = candidate; } } } return matchedString; }
From source file:org.onexus.website.api.pages.browser.BrowserPageStatus.java
@Override public void decodeParameters(PageParameters parameters, String keyPrefix) { StringValue currentTabId = parameters.get(keyPrefix + "tab"); if (!currentTabId.isEmpty()) { this.currentTabId = currentTabId.toString(); // Check that is a valid tabId if (getConfig().getTab(this.currentTabId) == null) { // Look for the more similar tab id List<TabConfig> tabs = new ArrayList<TabConfig>(getConfig().getTabs()); Collections.sort(tabs, new Comparator<TabConfig>() { @Override//from w ww. j a v a 2 s.c om public int compare(TabConfig o1, TabConfig o2) { Integer v1 = StringUtils.getLevenshteinDistance(BrowserPageStatus.this.currentTabId, o1.getId()); Integer v2 = StringUtils.getLevenshteinDistance(BrowserPageStatus.this.currentTabId, o2.getId()); return v1.compareTo(v2); } }); this.currentTabId = tabs.get(0).getId(); } } StringValue currentView = parameters.get(keyPrefix + "view"); if (!currentView.isEmpty()) { this.currentView = currentView.toString(); // Check that is a valid currentView if (getConfig().getTab(this.currentTabId).getView(this.currentView) == null) { // Look for the more similar view id List<ViewConfig> views = new ArrayList<ViewConfig>( getConfig().getTab(this.currentTabId).getViews()); if (views.size() > 1) { Collections.sort(views, new Comparator<ViewConfig>() { @Override public int compare(ViewConfig o1, ViewConfig o2) { Integer v1 = StringUtils.getLevenshteinDistance(BrowserPageStatus.this.currentView, o1.getTitle()); Integer v2 = StringUtils.getLevenshteinDistance(BrowserPageStatus.this.currentView, o2.getTitle()); return v1.compareTo(v2); } }); } this.currentView = views.get(0).getTitle(); } } selections = new ArrayList<IEntitySelection>(); List<StringValue> values = parameters.getValues(keyPrefix + "f"); if (!values.isEmpty()) { for (StringValue value : values) { SingleEntitySelection fe = new SingleEntitySelection(); fe.loadUrlPrameter(value.toString()); addEntitySelection(fe); } } values = parameters.getValues(keyPrefix + "fc"); if (!values.isEmpty()) { for (StringValue value : values) { MultipleEntitySelection fe = new MultipleEntitySelection(); fe.loadUrlPrameter(value.toString()); addEntitySelection(fe); } } super.decodeParameters(parameters, keyPrefix); //To change body of overridden methods use File | Settings | File Templates. }
From source file:org.starnub.utilities.strings.StringUtilities.java
/** * * This will search for words and replace them if they match within a certain percentage * * @param s String to be searched/* w w w. j a va 2s .c om*/ * @param wordToSearch String the word to be matched * @param percentToMatch double the percent to match * @param replaceWholeWord boolean replace the word with characters (true) or not (false) * @param replacementChar String the character to replace the word with * @return String the cleaned string */ public static String wordSearchReplacement(String s, String wordToSearch, double percentToMatch, boolean replaceWholeWord, String replacementChar) { double highEstMatch = 0; boolean firstTime = true; while (highEstMatch > percentToMatch || firstTime) { firstTime = false; int start = 0; int end = wordToSearch.length(); int s2Len = s.length(); int highStart = 0; int highEnd = 0; highEstMatch = 0; if (end <= s2Len) { boolean replace = false; while (end <= s2Len) { int stringsToChange = StringUtils.getLevenshteinDistance(wordToSearch.toLowerCase(), s.substring(start, end).toLowerCase()); double percentMatched = (100 - ((stringsToChange * 100) / end)); if (percentMatched >= percentToMatch && highEstMatch < percentMatched) { highEstMatch = percentMatched / end; highStart = start; highEnd = end; replace = true; } start++; end++; } String rS = ""; if (replaceWholeWord) { rS = StringUtils.repeat(replacementChar, s.substring(highStart, highEnd).length()); } } } return s; }