Java tutorial
//package com.java2s; /** Copyright 2016, Yahoo Inc. Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms. **/ import java.util.ArrayList; import java.util.List; public class Main { /** @param phrase1 * @param phrase2 * @return lexical similarity value in the range [0,1] */ public static double lexicalSimilarity(String phrase1, String phrase2) { List<String> pairs1 = wordLetterPairs(phrase1.toUpperCase()); List<String> pairs2 = wordLetterPairs(phrase2.toUpperCase()); int intersection = 0; int union = pairs1.size() + pairs2.size(); for (int i = 0; i < pairs1.size(); i++) { String pair1 = pairs1.get(i); for (int j = 0; j < pairs2.size(); j++) { String pair2 = pairs2.get(j); if (pair1.equals(pair2)) { intersection++; pairs2.remove(j); break; } } } return (2.0 * intersection) / union; } /** @param str * @return an ArrayList of 2-character Strings. */ private static List<String> wordLetterPairs(String str) { List<String> allPairs = new ArrayList<String>(); // Tokenize the string and put the tokens/words into an array String[] words = str.split("\\s"); // For each word for (int w = 0; w < words.length; w++) { // Find the pairs of characters String[] pairsInWord = letterPairs(words[w]); for (int p = 0; p < pairsInWord.length; p++) { allPairs.add(pairsInWord[p]); } } return allPairs; } /** @param str * @return an array of adjacent letter pairs contained in the input string */ private static String[] letterPairs(String str) { if (str.length() < 2) { return new String[0]; } int numPairs = str.length() - 1; String[] pairs = new String[numPairs]; for (int i = 0; i < numPairs; i++) { pairs[i] = str.substring(i, i + 2); } return pairs; } }