Find the Levenshtein distance between two Strings.
getLevenshteinDistance(null, *) = IllegalArgumentException getLevenshteinDistance(*, null) = IllegalArgumentException getLevenshteinDistance("","") = 0 getLevenshteinDistance("","a") = 1 getLevenshteinDistance("aaapppp", "") = 7 getLevenshteinDistance("frog", "fog") = 1 getLevenshteinDistance("fly", "ant") = 3 getLevenshteinDistance("elephant", "hippo") = 7 getLevenshteinDistance("hippo", "elephant") = 7 getLevenshteinDistance("hippo", "zzzzzzzz") = 8 getLevenshteinDistance("hello", "hallo") = 1
public class Main { public static void main(String[] argv) throws Exception { String s = "demo2s.com"; String t = "demo"; System.out.println(getLevenshteinDistance(s, t)); }/*from w ww .j av a 2s .c o m*/ public static int getLevenshteinDistance(String s, String t) { if (s == null || t == null) { throw new IllegalArgumentException("Strings must not be null"); } /* * The difference between this impl. and the previous is that, rather than * creating and retaining a matrix of size s.length()+1 by t.length()+1, we * maintain two single-dimensional arrays of length s.length()+1. The first, d, * is the 'current working' distance array that maintains the newest distance * cost counts as we iterate through the characters of String s. Each time we * increment the index of String t we are comparing, d is copied to p, the * second int[]. Doing so allows us to retain the previous cost counts as * required by the algorithm (taking the minimum of the cost count to the left, * up one, and diagonally up and to the left of the current cost count being * calculated). (Note that the arrays aren't really copied anymore, just * switched...this is clearly much better than cloning an array or doing a * System.arraycopy() each time through the outer loop.) * * Effectively, the difference between the two implementations is this one does * not cause an out of memory condition when calculating the LD over two very * large strings. */ int n = s.length(); // length of s int m = t.length(); // length of t if (n == 0) { return m; } else if (m == 0) { return n; } if (n > m) { // swap the input strings to consume less memory String tmp = s; s = t; t = tmp; n = m; m = t.length(); } int p[] = new int[n + 1]; // 'previous' cost array, horizontally int d[] = new int[n + 1]; // cost array, horizontally int _d[]; // placeholder to assist in swapping p and d // indexes into strings s and t int i; // iterates through s int j; // iterates through t char t_j; // jth character of t int cost; // cost for (i = 0; i <= n; i++) { p[i] = i; } for (j = 1; j <= m; j++) { t_j = t.charAt(j - 1); d[0] = j; for (i = 1; i <= n; i++) { cost = s.charAt(i - 1) == t_j ? 0 : 1; // minimum of cell to the left+1, to the top+1, diagonally left and up +cost d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost); } // copy current distance counts to 'previous row' distance counts _d = p; p = d; d = _d; } // our last action in the above loop was to switch d and p, so p now // actually has the most recent cost counts return p[n]; } } /* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */