Example usage for org.apache.commons.lang3 StringUtils getLevenshteinDistance

List of usage examples for org.apache.commons.lang3 StringUtils getLevenshteinDistance

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils getLevenshteinDistance.

Prototype

public static int getLevenshteinDistance(CharSequence s, CharSequence t) 

Source Link

Document

<p>Find the Levenshtein distance between two Strings.</p> <p>This is the number of changes needed to change one String into another, where each change is a single character modification (deletion, insertion or substitution).</p> <p>The previous implementation of the Levenshtein distance algorithm was from <a href="http://www.merriampark.com/ld.htm">http://www.merriampark.com/ld.htm</a></p> <p>Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError which can occur when my Java implementation is used with very large strings.<br> This implementation of the Levenshtein distance algorithm is from <a href="http://www.merriampark.com/ldjava.htm">http://www.merriampark.com/ldjava.htm</a></p> <pre> StringUtils.getLevenshteinDistance(null, *) = IllegalArgumentException StringUtils.getLevenshteinDistance(*, null) = IllegalArgumentException StringUtils.getLevenshteinDistance("","") = 0 StringUtils.getLevenshteinDistance("","a") = 1 StringUtils.getLevenshteinDistance("aaapppp", "") = 7 StringUtils.getLevenshteinDistance("frog", "fog") = 1 StringUtils.getLevenshteinDistance("fly", "ant") = 3 StringUtils.getLevenshteinDistance("elephant", "hippo") = 7 StringUtils.getLevenshteinDistance("hippo", "elephant") = 7 StringUtils.getLevenshteinDistance("hippo", "zzzzzzzz") = 8 StringUtils.getLevenshteinDistance("hello", "hallo") = 1 </pre>

Usage

From source file:org.lambda3.indra.filter.DistanceStringFilter.java

@Override
public boolean matches(String t1, String t2) {
    return t1.length() >= this.threshold && StringUtils.getLevenshteinDistance(t1, t2) < min;
}

From source file:org.lanes.utility.string.FuzzyMatcher.java

public static double stringSim(String str1, String str2) {
    double curveconstant = 0.2;//0.05 (close to straight line)
    double scaleconstant = 2.71799;

    str1 = str1.toLowerCase();//from   w ww  . j a v  a2  s .  c  o  m
    str2 = str2.toLowerCase();

    int edist = StringUtils.getLevenshteinDistance(str1, str2);
    double sim = 0;
    if (edist == 0) {
        sim = 1;
    } else {
        sim = Math.exp(-(Math.pow(edist, curveconstant))) * scaleconstant;
    }

    return sim;
}

From source file:org.languagetool.dev.wordsimilarity.SimilarWordFinder.java

private List<SimWord> findSimilarWordsFor(DirectoryReader reader, String word, TopDocs topDocs)
        throws IOException {
    List<SimWord> result = new ArrayList<>();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        String simWord = reader.document(scoreDoc.doc).get("word");
        //System.out.println(" sim: " + simWord);
        if (!simWord.equalsIgnoreCase(word) && !knownPairs.contains(simWord, word)) {
            int firstDiffPos = getDiffPos(simWord.toLowerCase(), word.toLowerCase());
            int limit = Math.min(word.length(), simWord.length()) - 1;
            if (firstDiffPos > limit) {
                //System.out.println("FILTERED: " + word + " -> " + simWord + " [" + firstDiffPos + " <= " + limit + "]");
            } else {
                int dist = StringUtils.getLevenshteinDistance(word, simWord);
                if (dist <= MAX_DIST) {
                    //System.out.println(word + " -> " + simWord + " [" + firstDiffPos + "]");
                    result.add(new SimWord(simWord, dist));
                }//from w w  w  .  j av a  2  s .c  o  m
            }
            knownPairs.add(simWord, word);
        }
    }
    return result;
}

From source file:org.languagetool.rules.de.SimilarNameRule.java

@Nullable
private String similarName(String nameHere, Set<String> namesSoFar) {
    for (String name : namesSoFar) {
        if (name.equals(nameHere)) {
            continue;
        }//  w  w  w. j  a va2s .com
        int lenDiff = Math.abs(name.length() - nameHere.length());
        boolean nameEndsWithS = name.endsWith("s") && !nameHere.endsWith("s");
        boolean otherNameEndsWithS = !name.endsWith("s") && nameHere.endsWith("s");
        boolean nameEndsWithN = name.endsWith("n") && !nameHere.endsWith("n"); // probably a dative
        boolean otherNameEndsWithN = !name.endsWith("n") && nameHere.endsWith("n");
        if (nameEndsWithS || otherNameEndsWithS || nameEndsWithN || otherNameEndsWithN) {
            // we assume this is a genitive, e.g. "Angela Merkels Ehemann"
            continue;
        }
        if (lenDiff <= MAX_DIFF && StringUtils.getLevenshteinDistance(name, nameHere) <= MAX_DIFF) {
            return name;
        }
    }
    return null;
}

From source file:org.languagetool.rules.spelling.SpellingCheckRule.java

@Experimental
protected List<String> reorderSuggestions(List<String> suggestions, String word) {
    // WORK IN PROGRESS
    if (languageModel == null) {
        return suggestions;
    }//  w  w w .ja v  a 2s . c  om
    BaseLanguageModel lm = (BaseLanguageModel) languageModel;
    List<Integer> levenshteinDistances = suggestions.stream()
            .map(suggestion -> StringUtils.getLevenshteinDistance(word, suggestion))
            .collect(Collectors.toList());
    List<Long> frequencies = suggestions.stream().map(lm::getCount).collect(Collectors.toList());
    Long frequenciesSum = frequencies.stream().reduce((a, b) -> a + b).orElse(1L);
    List<Float> normalizedFrequencies = frequencies.stream().map(f -> (float) f / frequenciesSum)
            .collect(Collectors.toList());
    System.out.println("frequencies: " + frequencies + " / normalized: " + normalizedFrequencies);

    List<Pair<String, Float>> scoredSuggestions = new ArrayList<>(suggestions.size());
    for (int i = 0; i < suggestions.size(); i++) {
        float score = (1f / normalizedFrequencies.get(i)) * levenshteinDistances.get(i);
        scoredSuggestions.add(Pair.of(suggestions.get(i), score));
    }
    scoredSuggestions.sort(Comparator.comparing(Pair::getRight));

    System.out.println(
            "Before reordering: " + suggestions.subList(0, 5) + " / After: " + scoredSuggestions.subList(0, 5));

    return scoredSuggestions.stream().map(Pair::getLeft).collect(Collectors.toList());
}

From source file:org.lanternpowered.server.data.manipulator.gen.DataManipulatorGenerator.java

@SuppressWarnings("unchecked")
private static Key[] findKeyMatches(List<Method> methods, Set<Key<?>> requiredKeys) {
    Key[] keys = new Key[methods.size()];
    for (int i = 0; i < methods.size(); i++) {
        final Method method = methods.get(i);
        final String methodName = DataHelper.camelToSnake(method.getName());

        int closestDistance = Integer.MAX_VALUE;
        Key closestKey = null;/*from w  w w .  j  a  v a2  s.  c om*/

        for (Key key : requiredKeys) {
            String keyId = key.getId();
            final int index = keyId.indexOf(':');
            if (index != -1) {
                keyId = keyId.substring(index + 1);
            }
            final int distance = StringUtils.getLevenshteinDistance(methodName, keyId);
            if (distance < closestDistance) {
                closestDistance = distance;
                closestKey = key;
            }
        }
        if (closestKey == null) {
            throw new IllegalStateException("No key match could be found for the method: " + method);
        }

        keys[i] = closestKey;
    }
    return keys;
}

From source file:org.linqs.psl.utils.textsimilarity.LevenshteinSimilarity.java

@Override
public double getValue(ReadableDatabase db, Constant... args) {

    String a = ((StringAttribute) args[0]).getValue();
    String b = ((StringAttribute) args[1]).getValue();

    int maxLen = Math.max(a.length(), b.length());
    if (maxLen == 0)
        return 1.0;

    double ldist = StringUtils.getLevenshteinDistance(a, b);
    double sim = 1.0 - (ldist / maxLen);

    if (sim > similarityThreshold)
        return sim;

    return 0.0;//from  ww w.  j a v a2  s  .c om
}

From source file:org.mousephenotype.www.testing.model.TestUtils.java

/**
 * Returns the closest match to <code>stringToMatch</code> in
 * <code>set</code>//w  w w .  j  a v  a2s  . c o  m
 *
 * @param set the set to search
 * 
 * @param stringToMatch the string to match
 * 
 * @return the closest match to <code>stringToMatch</code> in <code>set</code>
 */
public static String closestMatch(Set<String> set, String stringToMatch) {
    String matchedString = "";
    Integer matchedScore = null;
    if ((set == null) || (stringToMatch == null))
        return matchedString;

    Iterator<String> it = set.iterator();
    while (it.hasNext()) {
        String candidate = it.next();
        int candidateScore = StringUtils.getLevenshteinDistance(candidate, stringToMatch);
        if (matchedString.isEmpty()) { // First time through, populate matchedXxx.
            matchedString = candidate;
            matchedScore = candidateScore;
        } else {
            if ((candidateScore >= 0) && (candidateScore < matchedScore)) {
                matchedScore = candidateScore;
                matchedString = candidate;
            }
        }
    }

    return matchedString;
}

From source file:org.onexus.website.api.pages.browser.BrowserPageStatus.java

@Override
public void decodeParameters(PageParameters parameters, String keyPrefix) {

    StringValue currentTabId = parameters.get(keyPrefix + "tab");
    if (!currentTabId.isEmpty()) {
        this.currentTabId = currentTabId.toString();

        // Check that is a valid tabId
        if (getConfig().getTab(this.currentTabId) == null) {

            // Look for the more similar tab id
            List<TabConfig> tabs = new ArrayList<TabConfig>(getConfig().getTabs());
            Collections.sort(tabs, new Comparator<TabConfig>() {
                @Override//from w ww. j a  v  a  2  s.c  om
                public int compare(TabConfig o1, TabConfig o2) {
                    Integer v1 = StringUtils.getLevenshteinDistance(BrowserPageStatus.this.currentTabId,
                            o1.getId());
                    Integer v2 = StringUtils.getLevenshteinDistance(BrowserPageStatus.this.currentTabId,
                            o2.getId());

                    return v1.compareTo(v2);
                }
            });
            this.currentTabId = tabs.get(0).getId();
        }
    }

    StringValue currentView = parameters.get(keyPrefix + "view");
    if (!currentView.isEmpty()) {
        this.currentView = currentView.toString();

        // Check that is a valid currentView
        if (getConfig().getTab(this.currentTabId).getView(this.currentView) == null) {

            // Look for the more similar view id
            List<ViewConfig> views = new ArrayList<ViewConfig>(
                    getConfig().getTab(this.currentTabId).getViews());

            if (views.size() > 1) {
                Collections.sort(views, new Comparator<ViewConfig>() {
                    @Override
                    public int compare(ViewConfig o1, ViewConfig o2) {
                        Integer v1 = StringUtils.getLevenshteinDistance(BrowserPageStatus.this.currentView,
                                o1.getTitle());
                        Integer v2 = StringUtils.getLevenshteinDistance(BrowserPageStatus.this.currentView,
                                o2.getTitle());

                        return v1.compareTo(v2);
                    }
                });
            }

            this.currentView = views.get(0).getTitle();

        }
    }

    selections = new ArrayList<IEntitySelection>();
    List<StringValue> values = parameters.getValues(keyPrefix + "f");
    if (!values.isEmpty()) {
        for (StringValue value : values) {
            SingleEntitySelection fe = new SingleEntitySelection();
            fe.loadUrlPrameter(value.toString());
            addEntitySelection(fe);
        }
    }

    values = parameters.getValues(keyPrefix + "fc");
    if (!values.isEmpty()) {
        for (StringValue value : values) {
            MultipleEntitySelection fe = new MultipleEntitySelection();
            fe.loadUrlPrameter(value.toString());
            addEntitySelection(fe);
        }
    }

    super.decodeParameters(parameters, keyPrefix); //To change body of overridden methods use File | Settings | File Templates.
}

From source file:org.starnub.utilities.strings.StringUtilities.java

/**
 *
 * This will search for words and replace them if they match within a certain percentage
 *
 * @param s String to be searched/*  w  w  w. j a  va 2s  .c om*/
 * @param wordToSearch String the word to be matched
 * @param percentToMatch double the percent to match
 * @param replaceWholeWord boolean replace the word with characters (true) or not (false)
 * @param replacementChar String the character to replace the word with
 * @return String the cleaned string
 */
public static String wordSearchReplacement(String s, String wordToSearch, double percentToMatch,
        boolean replaceWholeWord, String replacementChar) {
    double highEstMatch = 0;
    boolean firstTime = true;
    while (highEstMatch > percentToMatch || firstTime) {
        firstTime = false;
        int start = 0;
        int end = wordToSearch.length();
        int s2Len = s.length();
        int highStart = 0;
        int highEnd = 0;
        highEstMatch = 0;
        if (end <= s2Len) {
            boolean replace = false;
            while (end <= s2Len) {
                int stringsToChange = StringUtils.getLevenshteinDistance(wordToSearch.toLowerCase(),
                        s.substring(start, end).toLowerCase());
                double percentMatched = (100 - ((stringsToChange * 100) / end));
                if (percentMatched >= percentToMatch && highEstMatch < percentMatched) {
                    highEstMatch = percentMatched / end;
                    highStart = start;
                    highEnd = end;
                    replace = true;
                }
                start++;
                end++;
            }
            String rS = "";
            if (replaceWholeWord) {
                rS = StringUtils.repeat(replacementChar, s.substring(highStart, highEnd).length());
            }
        }
    }
    return s;
}