List of usage examples for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone
public DoubleMetaphone()
From source file:io.klerch.alexa.tellask.model.AlexaInput.java
/** * Checks if a slot is contained in the intent request and has a value which is a * phonetic sibling of the string given to this method. Double metaphone algorithm * is optimized for English language and in this case is used to match slot value with * value given to this method.//from w ww.j a v a 2 s . c o m * @param slotName name of the slot to look after * @param value the value * @return True, if slot value and given value are phonetically equal with Double metaphone algorithm */ public boolean hasSlotIsDoubleMetaphoneEqual(final String slotName, final String value) { final String slotValue = getSlotValue(slotName); return hasSlotNotBlank(slotName) && value != null && new DoubleMetaphone().isDoubleMetaphoneEqual(slotValue, value); }
From source file:com.perceptive.epm.perkolcentral.action.ajax.EmployeeDetailsAction.java
public String executeGetAllEmployees() throws ExceptionWrapper { try {//from www . j a va 2s .com Soundex sndx = new Soundex(); DoubleMetaphone doubleMetaphone = new DoubleMetaphone(); final StringEncoderComparator comparator1 = new StringEncoderComparator(doubleMetaphone); LoggingHelpUtil.printDebug("Page " + getPage() + " Rows " + getRows() + " Sorting Order " + getSord() + " Index Row :" + getSidx()); LoggingHelpUtil.printDebug("Search :" + searchField + " " + searchOper + " " + searchString); // Calcalate until rows ware selected int to = (rows * page); // Calculate the first row to read int from = to - rows; LinkedHashMap<Long, EmployeeBO> employeeLinkedHashMap = new LinkedHashMap<Long, EmployeeBO>(); employeeLinkedHashMap = employeeBL.getAllEmployees(); ArrayList<EmployeeBO> allEmployees = new ArrayList<EmployeeBO>(employeeLinkedHashMap.values()); //Handle search if (searchOper != null && !searchOper.trim().equalsIgnoreCase("") && searchString != null && !searchString.trim().equalsIgnoreCase("")) { if (searchOper.trim().equalsIgnoreCase("eq")) { CollectionUtils.filter(allEmployees, new Predicate() { @Override public boolean evaluate(Object o) { return ((EmployeeBO) o).getEmployeeName().equalsIgnoreCase(searchString.trim()); //To change body of implemented methods use File | Settings | File Templates. } }); } else if (searchOper.trim().equalsIgnoreCase("slk")) { CollectionUtils.filter(allEmployees, new Predicate() { @Override public boolean evaluate(Object o) { return (new StringEncoderComparator(new Soundex()).compare( ((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0 || new StringEncoderComparator(new DoubleMetaphone()).compare( ((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0 || new StringEncoderComparator(new Metaphone()).compare( ((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0 || new StringEncoderComparator(new RefinedSoundex()).compare( ((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0); //To change body of implemented methods use File | Settings | File Templates. } }); } else { //First check whether there is an exact match if (CollectionUtils.exists(allEmployees, new Predicate() { @Override public boolean evaluate(Object o) { return (((EmployeeBO) o).getEmployeeName().toLowerCase() .contains(searchString.trim().toLowerCase())); //To change body of implemented methods use File | Settings | File Templates. } })) { CollectionUtils.filter(allEmployees, new Predicate() { @Override public boolean evaluate(Object o) { return (((EmployeeBO) o).getEmployeeName().toLowerCase() .contains(searchString.trim().toLowerCase())); } }); } else { ArrayList<String> matchedEmployeeIds = employeeBL.getLuceneUtil() .getBestMatchEmployeeName(searchString.trim().toLowerCase()); allEmployees = new ArrayList<EmployeeBO>(); for (String id : matchedEmployeeIds) { allEmployees.add(employeeBL.getAllEmployees().get(Long.valueOf(id))); } } } /*{ CollectionUtils.filter(allEmployees, new Predicate() { @Override public boolean evaluate(Object o) { if (((EmployeeBO) o).getEmployeeName().toLowerCase().contains(searchString.trim().toLowerCase())) return true; else if(new StringEncoderComparator(new Soundex()).compare(((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0 || new StringEncoderComparator(new DoubleMetaphone()).compare(((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0) { return true; } else { for (String empNameParts : ((EmployeeBO) o).getEmployeeName().trim().split(" ")) { if (new StringEncoderComparator(new Soundex()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0 || new StringEncoderComparator(new DoubleMetaphone()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0 // || new StringEncoderComparator(new Metaphone()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0 // || new StringEncoderComparator(new RefinedSoundex()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0 ) { return true; } } return false; } } }); } */ } //// Handle Order By if (sidx != null && !sidx.equals("")) { Collections.sort(allEmployees, new Comparator<EmployeeBO>() { public int compare(EmployeeBO e1, EmployeeBO e2) { if (sidx.equalsIgnoreCase("employeeName")) return sord.equalsIgnoreCase("asc") ? e1.getEmployeeName().compareTo(e2.getEmployeeName()) : e2.getEmployeeName().compareTo(e1.getEmployeeName()); else if (sidx.equalsIgnoreCase("jobTitle")) return sord.equalsIgnoreCase("asc") ? e1.getJobTitle().compareTo(e2.getJobTitle()) : e2.getJobTitle().compareTo(e1.getJobTitle()); else if (sidx.equalsIgnoreCase("manager")) return sord.equalsIgnoreCase("asc") ? e1.getManager().compareTo(e2.getManager()) : e2.getManager().compareTo(e1.getManager()); else return sord.equalsIgnoreCase("asc") ? e1.getEmployeeName().compareTo(e2.getEmployeeName()) : e2.getEmployeeName().compareTo(e1.getEmployeeName()); } }); } // records = allEmployees.size(); total = (int) Math.ceil((double) records / (double) rows); gridModel = new ArrayList<EmployeeBO>(); to = to > records ? records : to; for (int iCounter = from; iCounter < to; iCounter++) { EmployeeBO employeeBO = allEmployees.get(iCounter); //new EmployeeBO((Employee) employeeLinkedHashMap.values().toArray()[iCounter]); gridModel.add(employeeBO); } } catch (Exception ex) { throw new ExceptionWrapper(ex); } return SUCCESS; }
From source file:at.jps.sanction.core.util.TokenTool.java
public static float compareCheckDoubleMetaphone(final String text1, final String text2, final boolean fuzzy, final int minlen, final double fuzzyValue) { final DoubleMetaphone encoder = new DoubleMetaphone(); // TODO: in reallife // make/*from ww w . j av a2 s . com*/ // this go away !! return (compareCheck(encoder.doubleMetaphone(text1), encoder.doubleMetaphone(text2), fuzzy, minlen, fuzzyValue)); }
From source file:edu.mayo.informatics.cts.CTSVAPI.lucene.LuceneSearch.java
private void init(String indexLocation) throws UnexpectedError { try {/*from www . ja v a 2 s . c o m*/ service_ = new IndexerService(indexLocation, false); indexSearchers_ = new Hashtable(); codeSystemToIndexMap_ = new Hashtable(); if (CTSConstants.LUCENE_SEARCH_ENABLED.getValue()) { WhiteSpaceLowerCaseAnalyzer wslca = new WhiteSpaceLowerCaseAnalyzer(new String[] {}, WhiteSpaceLowerCaseAnalyzer.getDefaultCharRemovalSet(), WhiteSpaceLowerCaseAnalyzer.getDefaultWhiteSpaceSet()); extraWhiteSpaceChars_ = wslca.getCurrentCharRemovalTable(); // Use a FieldSkippingAnalyzer, so it doesn't tokenize on the non-tokenized fields. PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new FieldSkippingAnalyzer( new String[] { "codingSchemeName", "conceptCode", "isActive", "isPreferred", "presentationFormat", "language", "conceptStatus", "propertyId", "dataType", "degreeOfFidelity", "representationalForm", "matchIfNoContext", "property" }, wslca)); if (CTSConstants.LUCENE_DOUBLE_METAPHONE_SEARCH_ENABLED.getValue()) { EncoderAnalyzer temp = new EncoderAnalyzer(new DoubleMetaphone(), new String[] {}, WhiteSpaceLowerCaseAnalyzer.getDefaultCharRemovalSet(), WhiteSpaceLowerCaseAnalyzer.getDefaultWhiteSpaceSet()); analyzer.addAnalyzer("dm_propertyValue", temp); } if (CTSConstants.LUCENE_STEMMED_SEARCH_ENABLED.getValue()) { SnowballAnalyzer sa = new SnowballAnalyzer(false, "English", new String[] {}, WhiteSpaceLowerCaseAnalyzer.getDefaultCharRemovalSet(), WhiteSpaceLowerCaseAnalyzer.getDefaultWhiteSpaceSet()); analyzer.addAnalyzer("stem_propertyValue", sa); } // LVG Norm searching has been retired. // if (CTSConstants.LUCENE_NORM_SEARCH_ENABLED.getValue()) // { // try // { // NormAnalyzer temp = new NormAnalyzer(false, new String[]{}, WhiteSpaceLowerCaseAnalyzer // .getDefaultCharRemovalSet(), WhiteSpaceLowerCaseAnalyzer.getDefaultWhiteSpaceSet()); // // indexerService_.createIndex(normIndexName_, temp); // analyzer.addAnalyzer("norm_propertyValue", temp); // } // catch (NoClassDefFoundError e) // { // // norm is not available // CTSConstants.LUCENE_NORM_SEARCH_ENABLED.setValue(false); // logger.error("LuceneNormSearch could not be initialized. Is Norm (lvg) on the classpath?", e); // } // } parser_ = new QueryParser("propertyValue", analyzer); } else { logger.error("Tried to init a Lucene searcher when lucene search is not enabled"); throw new UnexpectedError("Lucene search functionality is not enabled in the configuration files."); } } catch (UnexpectedError e) { throw e; } catch (InternalErrorException e) { throw new UnexpectedError("There was a problem opening the lucene index."); } }
From source file:com.puppetlabs.geppetto.pp.dsl.contentassist.PPProposalsGenerator.java
/** * Attempts to produce a list of names that are close to the given name. At most 5 proposals * are generated. The returned proposals are made in order of "pronunciation distance" which is * obtained by taking the Levenshtein distance between the Double Monophone encodings of * candidate and given name. Candidates are selected as the names with shortest Levenshtein distance * and names that are Monophonically equal, or starts or ends monophonically. * // ww w . ja va 2 s.c o m * @param currentName * the name for which proposals are to be generated * @param descs * the descriptors of available named values * @param searchPath * TODO * @param types * if stated, the wanted types of named values * @return * array of proposals, possibly empty, but never null. */ public String[] computeProposals(final String currentName, Collection<IEObjectDescription> descs, boolean upperCaseProposals, PPSearchPath searchPath, EClass... types) { if (currentName == null || currentName.length() < 1) return new String[0]; // compute the 5 best matches and only accept if score <= 5 ScoreKeeper<IEObjectDescription> tracker = new ScoreKeeper<IEObjectDescription>(5, false, 5); // List<IEObjectDescription> metaphoneAlike = Lists.newArrayList(); final DoubleMetaphone encoder = new DoubleMetaphone(); final String metaphoneName = encoder.encode(currentName); for (IEObjectDescription d : descs) { EClass c = d.getEClass(); typeok: if (types != null && types.length > 0) { for (EClass wanted : types) if ((wanted == c || wanted.isSuperTypeOf(c))) break typeok; continue; } // filter based on path visibility if (searchPath.searchIndexOf(d) == -1) continue; // not visible according to path String candidateName = converter.toString(d.getName()); tracker.addScore(StringUtils.getLevenshteinDistance(currentName, candidateName), d); String candidateMetaphone = encoder.encode(candidateName); // metaphone matches are scored on the pronounciation distance if (metaphoneName.equals(candidateMetaphone) // || candidateMetaphone.startsWith(metaphoneName) // || candidateMetaphone.endsWith(metaphoneName) // ) tracker.addScore(StringUtils.getLevenshteinDistance(metaphoneName, candidateMetaphone), d); // System.err.printf("Metaphone alike: %s == %s\n", currentName, candidateName); } List<String> result = Lists.newArrayList(); // System.err.print("Scores = "); for (ScoreEntry<IEObjectDescription> entry : tracker.getScoreEntries()) { String s = converter.toString(entry.getData().getName()); result.add(s); // System.err.printf("%d %s, ", entry.getScore(), s); } // System.err.println(); String[] proposals = result.toArray(new String[result.size()]); PronunciationComparator x = new PronunciationComparator(encoder, metaphoneName); Arrays.sort(proposals, x); // System.err.print("Order = "); // for(int i = 0; i < proposals.length; i++) // System.err.printf("%s, ", proposals[i]); // System.err.println(); return upperCaseProposals ? toUpperCaseProposals(proposals) : proposals; }
From source file:it.univpm.deit.semedia.musicuri.core.Toolset.java
/** * Genarates a list of terms that are the metaphone equivalents of the words in the given list. * The terms are generated using the double metaphone phonetic maching algorithm (apache implementation) * @param keywords an aArrayList object containing the keywords to generate metaphones for * @return an aArrayList object containing the generated metaphone equivalent terms *//*from w ww . j av a 2 s. c o m*/ public static ArrayList GenerateMetaphones(ArrayList keywords) { ArrayList metaphoneList = new ArrayList(keywords.size()); DoubleMetaphone meta = new DoubleMetaphone(); String tmp = null; for (int i = 0; i < keywords.size(); i++) { tmp = meta.encode((String) keywords.get(i)); metaphoneList.add(tmp); } return metaphoneList; }
From source file:org.activityinfo.server.endpoint.refine.ReconciliationServiceTest.java
@Test public void doubleMetaphone() { DoubleMetaphone encoder = new DoubleMetaphone(); System.out.println(encoder.doubleMetaphone("Tin-E") + " " + encoder.doubleMetaphone("Youwarou", true)); System.out.println(encoder.doubleMetaphone("Youvarou")); }
From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java
public void testAlgorithms() throws Exception { assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" }); assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "KKK", "ESKS" }); assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" }); assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg", new String[] { "A", "PP", "KK", "ASKS" }); assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg", new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" }); assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg", new String[] { "A000", "B000", "C000", "E220" }); assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg", new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" }); assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg", new String[] { "A0", "B1", "C3", "E034034" }); assertAlgorithm(new Caverphone2(), true, "Darda Karleen Datha Carlene", new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen", "TTA1111111", "Datha", "KLN1111111", "Carlene" }); assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene", new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" }); assertAlgorithm(new Nysiis(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "B", "bbb", "C", "ccc", "EASGAS", "easgasg" }); assertAlgorithm(new Nysiis(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "C", "EASGAS" }); }
From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java
/** blast some random strings through the analyzer */ public void testRandomStrings() throws IOException { Encoder encoders[] = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone2() }; for (final Encoder e : encoders) { Analyzer a = new Analyzer() { @Override// w w w. j a va 2 s . c o m protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false)); } }; checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER); a.close(); Analyzer b = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false)); } }; checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER); b.close(); } }
From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java
public void testEmptyTerm() throws IOException { Encoder encoders[] = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone2() }; for (final Encoder e : encoders) { Analyzer a = new Analyzer() { @Override/* w ww.j av a2 s . c om*/ protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random().nextBoolean())); } }; checkOneTerm(a, "", ""); a.close(); } }