List of usage examples for org.apache.commons.lang3 StringUtils getLevenshteinDistance
public static int getLevenshteinDistance(CharSequence s, CharSequence t)
From source file:codeu.chat.client.commandline.Chat.java
private boolean parseScript(String link, String phrase, boolean springfield) { String[] script;/*from w ww .ja v a 2 s .c om*/ try { Document doc = Jsoup.connect(link).get(); /* If the script was retrieved from the Springfield website, the lines must be split up using the <br> tag instead of new line characters */ if (springfield) { String temp = Jsoup.parse(doc.html().replaceAll("(?i)<br[^>]*>", "br2n")).text(); script = mergeScriptSentences(temp.split("br2n")); } else { script = mergeScriptSentences(doc.body().text().split("\n")); } /* Search for a line containing the phrase. Once one is found, determine the best response and return accordingly. In some cases, this will mean continuing to search for a later match */ for (int lineNum = 0; lineNum < script.length; lineNum++) { script[lineNum] = script[lineNum].trim().toLowerCase(); for (String sentence : script[lineNum].split("(?<=[!\\?\\.])")) { if (sentence.contains(phrase) || StringUtils.getLevenshteinDistance(sentence, phrase) <= phrase.length() / 3.0) { if (findNextScriptResponse(lineNum, phrase, script)) { return true; } } } } } catch (IOException e) { e.printStackTrace(); } return false; // Return false if no line containing the phrase was found }
From source file:codeu.chat.client.commandline.Chat.java
private boolean findNextScriptResponse(int lineNum, String phrase, String[] script) { /* Check if a line from a different character (i.e. an actual response) can be found. If not, just respond with the very next line of dialogue */ if (canFindNewCharacter(lineNum, script)) { return true; }//from ww w. j av a 2 s . c om /* Next, check if the line containing this phrase contains more wording. If it does, use the next sentence as the response */ if (lineContainsMoreSentences(script[lineNum], phrase)) { return true; } // Advance to the next non-blank line, indicating the next piece of dialogue lineNum = advanceToNextNonBlankLine(lineNum, script); // Ensure that there was a next line of dialogue if (lineNum >= script.length || StringUtils.getLevenshteinDistance(script[lineNum].trim(), phrase) < phrase.length() / 3.0) { return false; } /* Add this response found from an online script to the response map for future reference/usage */ scriptMap.get(MATCHED_PHRASE).add(adjustScriptLine(script[lineNum].trim())); return true; }
From source file:net.stargraph.rank.impl.LevenshteinRanker.java
@Override double computeStringDistance(CharSequence s1, CharSequence s2) { return StringUtils.getLevenshteinDistance(s1, s2); }
From source file:nl.mvdr.umvc3replayanalyser.ocr.TesseractOCREngine.java
/** * Matches the given string to a character's name. * // w w w. j av a2s . co m * @param text * text to be matched, should be a Marvel character name * @param possibleCharacters * the characters that text may match, may not be empty * @return the character to whose name the given text is closest * @throws OCRException * in case the matching character cannot be uniquely determined */ private Umvc3Character matchToCharacterName(String text, Set<Umvc3Character> possibleCharacters) throws OCRException { if (log.isDebugEnabled()) { if (possibleCharacters.size() == Umvc3Character.values().length) { log.debug(String.format("Attempting to match %s to the UMvC3 characters", text)); } else { log.debug(String.format("Attempting to match %s to the following characters: %s", text, possibleCharacters)); } } // Compute the minimal Levenshtein distance between the given text and the uppercase character names. int minimalDistance = Integer.MAX_VALUE; Set<Umvc3Character> matchingCharacters = EnumSet.noneOf(Umvc3Character.class); for (Umvc3Character character : possibleCharacters) { int distance = StringUtils.getLevenshteinDistance(character.getName().toUpperCase(), text); if (distance < minimalDistance) { minimalDistance = distance; matchingCharacters.clear(); matchingCharacters.add(character); } else if (distance == minimalDistance) { matchingCharacters.add(character); } } // matchingCharacters is not empty, since there must be at least one character with a distance less than // Integer.MAX_INT. Umvc3Character result; if (1 < matchingCharacters.size()) { // More than one match found. result = handleMultipleMatches(text, minimalDistance, matchingCharacters); } else { // Exactly one match, return it. result = matchingCharacters.iterator().next(); } if (log.isDebugEnabled()) { log.debug(String.format("Match found: %s. levenshtein(%s, %s) = %s", result, result.getName().toUpperCase(), text, "" + minimalDistance)); } return result; }
From source file:nl.mvdr.umvc3replayanalyser.video.ReplayAnalyserIntegrationTest.java
/** * Checks the given player name.//from w w w .ja v a2 s .c o m * * @param expected * expected name * @param actual * actual name */ private void assertPlayerName(String expected, String actual) { // OCR'ing player names is hard, so we're going to give Tesseract a break and allow for some mismatched // characters. int distance = StringUtils.getLevenshteinDistance(expected, actual); String message = String.format("Expected \"%s\", got \"%s\", Levenshtein distance: %s", expected, actual, "" + distance); log.info(message); Assert.assertTrue(message, distance < 5); }
From source file:org.apache.hadoop.hive.ql.udf.generic.GenericUDFLevenshtein.java
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { String str0 = getStringValue(arguments, 0, converters); String str1 = getStringValue(arguments, 1, converters); if (str0 == null || str1 == null) { return null; }//from w w w . jav a2 s . c o m int dist = StringUtils.getLevenshteinDistance(str0, str1); output.set(dist); return output; }
From source file:org.cellcore.code.engine.page.extractor.AbstractEditionsExtractor.java
protected String checkList(String name) { if (name.toLowerCase().contains("foil")) { return null; }// ww w. java2s . c om name = name.replaceAll("(^( )+|( )+$)", ""); for (String key : getEditions().keySet()) { String iname = Normalizer.normalize(name, Normalizer.Form.NFD).toLowerCase().replaceAll("[^\\p{ASCII}]", ""); String kname = Normalizer.normalize(key, Normalizer.Form.NFD).toLowerCase().replaceAll("[^\\p{ASCII}]", ""); int distance = StringUtils.getLevenshteinDistance(iname, kname); boolean numeral = false; boolean numeralProceed = false; if (iname.replaceAll("\\D+", "").length() > 0) { numeral = true; if (iname.replaceAll("\\D+", "").equals(kname.replaceAll("\\D+", ""))) { numeralProceed = true; } } if (((kname.contains(iname) || iname.contains(kname) || distance <= 2) && iname.length() > 3) && (numeral == numeralProceed)) { logger.info("Found " + iname + " " + getEditions().get(key)); return getEditions().get(key); } } return null; }
From source file:org.codarama.haxsync.services.ContactsSyncAdapterService.java
private static String matches(Set<String> phoneContacts, String fbContact, int maxdistance) { if (maxdistance == 0) { if (phoneContacts.contains(fbContact)) { return fbContact; }/*from www .j a v a 2 s . co m*/ return null; //return phoneContacts.contains(fbContact); } int bestDistance = maxdistance; String bestMatch = null; for (String contact : phoneContacts) { int distance = StringUtils.getLevenshteinDistance(contact != null ? contact.toLowerCase() : "", fbContact != null ? fbContact.toLowerCase() : ""); if (distance <= bestDistance) { //Log.i("FOUND MATCH", "Phone Contact: " + contact +" FB Contact: " + fbContact +" distance: " + distance + "max distance: " +maxdistance); bestMatch = contact; bestDistance = distance; } } return bestMatch; }
From source file:org.dbgl.util.StringRelatedUtils.java
public static int findBestMatchIndex(final String search, final String[] titles) { if (titles == null || titles.length == 0) return -1; String s = search.toLowerCase(); int minDistance = Integer.MAX_VALUE; int result = 0; for (int i = 0; i < titles.length; i++) { String title = FilenameUtils.removeExtension(titles[i].toLowerCase()); int distance = (i == 0) ? StringUtils.getLevenshteinDistance(s, title) : StringUtils.getLevenshteinDistance(s, title, minDistance - 1); if (distance == 0) return i; if (distance != -1) { minDistance = distance;// www.ja v a2s . c o m result = i; } } return result; }
From source file:org.krobot.util.MessageUtils.java
/** * Get the most similar message of a list to a base<br><br> * * <b>Example:</b><br><br> * * base = hello<br>/*from ww w . java 2 s . c o m*/ * messages = [haul, hella, yay]<br><br> * * It returns <b>hella</b> * * @param base The base message * @param messages The messages where to get the most similar * * @return The most similar message to the base */ public static String getMostSimilar(String base, String[] messages) { ArrayList<Integer> matches = new ArrayList<>(); for (String message : messages) { matches.add(StringUtils.getLevenshteinDistance(base, message)); } int candidateIndex = 0; int candidate = Integer.MAX_VALUE; for (int i = 0; i < matches.size(); i++) { int entry = matches.get(i); if (entry < candidate) { candidate = entry; candidateIndex = i; } } if (candidate > 10) { return null; } return messages[matches.get(candidateIndex)]; }