Example usage for org.apache.commons.lang3 StringUtils getLevenshteinDistance

List of usage examples for org.apache.commons.lang3 StringUtils getLevenshteinDistance

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils getLevenshteinDistance.

Prototype

public static int getLevenshteinDistance(CharSequence s, CharSequence t) 

Source Link

Document

<p>Find the Levenshtein distance between two Strings.</p> <p>This is the number of changes needed to change one String into another, where each change is a single character modification (deletion, insertion or substitution).</p> <p>The previous implementation of the Levenshtein distance algorithm was from <a href="http://www.merriampark.com/ld.htm">http://www.merriampark.com/ld.htm</a></p> <p>Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError which can occur when my Java implementation is used with very large strings.<br> This implementation of the Levenshtein distance algorithm is from <a href="http://www.merriampark.com/ldjava.htm">http://www.merriampark.com/ldjava.htm</a></p> <pre> StringUtils.getLevenshteinDistance(null, *) = IllegalArgumentException StringUtils.getLevenshteinDistance(*, null) = IllegalArgumentException StringUtils.getLevenshteinDistance("","") = 0 StringUtils.getLevenshteinDistance("","a") = 1 StringUtils.getLevenshteinDistance("aaapppp", "") = 7 StringUtils.getLevenshteinDistance("frog", "fog") = 1 StringUtils.getLevenshteinDistance("fly", "ant") = 3 StringUtils.getLevenshteinDistance("elephant", "hippo") = 7 StringUtils.getLevenshteinDistance("hippo", "elephant") = 7 StringUtils.getLevenshteinDistance("hippo", "zzzzzzzz") = 8 StringUtils.getLevenshteinDistance("hello", "hallo") = 1 </pre>

Usage

From source file:qa.ProcessFrameProcessor.java

public ArrayList<ProcessFrame> getQuestionFrame(String questionTxt) {
    ArrayList<ProcessFrame> results = new ArrayList<ProcessFrame>();
    for (ProcessFrame p : this.getProcArr()) {
        if (p.getQuestionText().trim().equalsIgnoreCase(questionTxt.trim())
                || StringUtils.getLevenshteinDistance(p.getQuestionText().trim(), questionTxt.trim()) < 0.3
                        * questionTxt.length()) {
            results.add(p);//  w  w  w .j  a  v a2s.  c  om
        }
    }
    return results;
}

From source file:sbu.srl.rolextract.SBURolePredict.java

public static Object getBestArgument(ParseResult parseResult, String targetText) {
    List<Predicate> predicates = parseResult.getPredicates();
    List<Argument> arguments = new ArrayList<Argument>();

    for (Predicate predicate : predicates) {
        arguments.addAll(predicate.getArguments());
    }/*from  w  w  w. j  a  v a  2 s  .  c o  m*/
    ArrayList<Object> overlappedSpans = new ArrayList<>();
    int minimumDistance = Integer.MAX_VALUE;
    boolean overlapping = false;
    for (Predicate predicate : predicates) {
        if (isOverlapping(predicate.getText(), targetText)) {
            minimumDistance = Math.min(minimumDistance,
                    StringUtils.getLevenshteinDistance(targetText, predicate.getText()));
            overlapping = true;
        }
    }
    for (Argument argument : arguments) {
        if (isOverlapping(targetText, argument.getText())) {
            minimumDistance = Math.min(minimumDistance,
                    StringUtils.getLevenshteinDistance(targetText, argument.getText()));
            overlapping = true;
        }
    }
    if (!overlapping) {
        return null; // NONE
    }
    for (Predicate predicate : predicates) {
        if (StringUtils.getLevenshteinDistance(targetText, predicate.getText()) == minimumDistance) {
            overlappedSpans.add(predicate);
        }
    }
    for (Argument argument : arguments) {
        if (StringUtils.getLevenshteinDistance(targetText, argument.getText()) == minimumDistance) {
            overlappedSpans.add(argument);
        }
    }
    if (overlappedSpans.size() > 1) {
        double maxScore = Double.MIN_VALUE;
        Object bestSpan = null;
        for (Object obj : overlappedSpans) {
            if (obj instanceof Predicate) {
                if (((Predicate) obj).getScore() > maxScore) {
                    maxScore = ((Predicate) obj).getScore();
                    bestSpan = ((Predicate) obj);
                }
            } else {
                if (((Argument) obj).getArgScore() > maxScore) {
                    maxScore = ((Argument) obj).getArgScore();
                    bestSpan = ((Argument) obj);
                }
            }
        }
        return bestSpan;
    } else {
        return overlappedSpans.get(0);
    }
}

From source file:Search.DataManipulation.DataValidator.java

public boolean matchNameBundle(String name, String bundleId) {
    String[] bundleElements = bundleId.toLowerCase().split("\\.");
    String[] nameElements = name.toLowerCase().split("\\s+");
    int elementCount = nameElements.length;
    int matches = 0;
    boolean match = false;

    for (String bundleElement : bundleElements) {
        for (String nameElement : nameElements) {
            int distance = StringUtils.getLevenshteinDistance(bundleElement, nameElement);
            int nameLength = nameElement.length();
            int bundleLength = bundleElement.length();
            int lengthDistance = Math.abs(nameLength - bundleLength);

            if (distance <= (lengthDistance)) {
                ++matches;//from   w w w.  j a v a2 s  .  c o m
            }
        }
    }

    if (matches > (elementCount - 2)) {
        match = true;
    }

    return match;
}

From source file:ubc.pavlab.gotrack.beans.Cache.java

@Override
public int compare(String a, String b) {
    int d1 = StringUtils.getLevenshteinDistance(a, compareTo);
    int d2 = StringUtils.getLevenshteinDistance(b, compareTo);
    return d1 < d2 ? -1 : d1 == d2 ? 0 : 1;
}

From source file:utilities.strings.StringUtilities.java

/**
 * This will compare the percentage similarity of two words
 *
 * @param s String string to be compared against
 * @param s2 String string to compare//from   w w w  .ja  v  a  2 s.c  o  m
 * @return double the percentage of similarity
 * @throws ArithmeticException if issue calculating
 */
public static double similarityCalculation(String s, String s2) throws ArithmeticException {
    return 100 - ((StringUtils.getLevenshteinDistance(s, s2) * 100)) / ((s.length() + s2.length()) / 2);
}

From source file:utils.hashing.similarity.java

/**
 * Compute the similarity between two strings and provide a percentage,
 * doesn't really matter in which order they are compared
 * @param s0    String 1/*  w w w  .j a v a 2  s  . c  o  m*/
 * @param s1    String 2
 * @return  A value ranging from 0 to 100%
 */
public static int levenshteinPercentage(final String s0, final String s1) {
    final int value = StringUtils.getLevenshteinDistance(s0, s1);
    int percentage = (int) (100 - (float) value * 100 / (float) (s0.length() + s1.length()));
    return percentage;
}

From source file:wsattacker.sso.openid.attacker.evaluation.strategies.StringSimilarityCallable.java

@Override
public Float call() throws Exception {
    //Instant startComputation = Instant.now();
    float result = StringUtils.getLevenshteinDistance(s1, s2);
    //Instant endComputation = Instant.now();
    //Duration duration = Duration.between(startComputation, endComputation);
    //System.out.println("length: " + s1.length() + "/" + s2.length() + ", duration: " + (duration.toNanos() / 1000000000) + " s, " + "result: " + result);
    //System.out.println("duration levenshtein: " + (duration.toNanos() / 1000000000) + " s");

    return result;
}