Example usage for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone

public DoubleMetaphone() 

Source Link


Creates an instance of this DoubleMetaphone encoder


From source file:io.klerch.alexa.tellask.model.AlexaInput.java

 * Checks if a slot is contained in the intent request and has a value which is a
 * phonetic sibling of the string given to this method. Double metaphone algorithm
 * is optimized for English language and in this case is used to match slot value with
 * value given to this method.//from   w ww.j  a  v  a  2 s  . c  o m
 * @param slotName name of the slot to look after
 * @param value the value
 * @return True, if slot value and given value are phonetically equal with Double metaphone algorithm
public boolean hasSlotIsDoubleMetaphoneEqual(final String slotName, final String value) {
    final String slotValue = getSlotValue(slotName);
    return hasSlotNotBlank(slotName) && value != null
            && new DoubleMetaphone().isDoubleMetaphoneEqual(slotValue, value);

From source file:com.perceptive.epm.perkolcentral.action.ajax.EmployeeDetailsAction.java

public String executeGetAllEmployees() throws ExceptionWrapper {
    try {//from www .  j  a va  2s  .com
        Soundex sndx = new Soundex();
        DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
        final StringEncoderComparator comparator1 = new StringEncoderComparator(doubleMetaphone);

        LoggingHelpUtil.printDebug("Page " + getPage() + " Rows " + getRows() + " Sorting Order " + getSord()
                + " Index Row :" + getSidx());
        LoggingHelpUtil.printDebug("Search :" + searchField + " " + searchOper + " " + searchString);

        // Calcalate until rows ware selected
        int to = (rows * page);

        // Calculate the first row to read
        int from = to - rows;
        LinkedHashMap<Long, EmployeeBO> employeeLinkedHashMap = new LinkedHashMap<Long, EmployeeBO>();

        employeeLinkedHashMap = employeeBL.getAllEmployees();
        ArrayList<EmployeeBO> allEmployees = new ArrayList<EmployeeBO>(employeeLinkedHashMap.values());
        //Handle search
        if (searchOper != null && !searchOper.trim().equalsIgnoreCase("") && searchString != null
                && !searchString.trim().equalsIgnoreCase("")) {
            if (searchOper.trim().equalsIgnoreCase("eq")) {
                CollectionUtils.filter(allEmployees, new Predicate() {
                    public boolean evaluate(Object o) {
                        return ((EmployeeBO) o).getEmployeeName().equalsIgnoreCase(searchString.trim()); //To change body of implemented methods use File | Settings | File Templates.
            } else if (searchOper.trim().equalsIgnoreCase("slk")) {
                CollectionUtils.filter(allEmployees, new Predicate() {
                    public boolean evaluate(Object o) {
                        return (new StringEncoderComparator(new Soundex()).compare(
                                ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                searchString.trim().toLowerCase()) == 0
                                || new StringEncoderComparator(new DoubleMetaphone()).compare(
                                        ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                        searchString.trim().toLowerCase()) == 0
                                || new StringEncoderComparator(new Metaphone()).compare(
                                        ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                        searchString.trim().toLowerCase()) == 0
                                || new StringEncoderComparator(new RefinedSoundex()).compare(
                                        ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                        searchString.trim().toLowerCase()) == 0); //To change body of implemented methods use File | Settings | File Templates.
            } else {
                //First check whether there is an exact match
                if (CollectionUtils.exists(allEmployees, new Predicate() {
                    public boolean evaluate(Object o) {
                        return (((EmployeeBO) o).getEmployeeName().toLowerCase()
                                .contains(searchString.trim().toLowerCase())); //To change body of implemented methods use File | Settings | File Templates.
                })) {
                    CollectionUtils.filter(allEmployees, new Predicate() {
                        public boolean evaluate(Object o) {
                            return (((EmployeeBO) o).getEmployeeName().toLowerCase()
                } else {
                    ArrayList<String> matchedEmployeeIds = employeeBL.getLuceneUtil()
                    allEmployees = new ArrayList<EmployeeBO>();
                    for (String id : matchedEmployeeIds) {

            CollectionUtils.filter(allEmployees, new Predicate() {
                public boolean evaluate(Object o) {
                    if (((EmployeeBO) o).getEmployeeName().toLowerCase().contains(searchString.trim().toLowerCase()))
                        return true;
                    else if(new StringEncoderComparator(new Soundex()).compare(((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0
                            || new StringEncoderComparator(new DoubleMetaphone()).compare(((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0)
                        return true;
                    else {
                        for (String empNameParts : ((EmployeeBO) o).getEmployeeName().trim().split(" ")) {
                            if (new StringEncoderComparator(new Soundex()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                    || new StringEncoderComparator(new DoubleMetaphone()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                //    || new StringEncoderComparator(new Metaphone()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                //    || new StringEncoderComparator(new RefinedSoundex()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                    ) {
                                return true;
                        return false;
            } */
        //// Handle Order By
        if (sidx != null && !sidx.equals("")) {

            Collections.sort(allEmployees, new Comparator<EmployeeBO>() {
                public int compare(EmployeeBO e1, EmployeeBO e2) {
                    if (sidx.equalsIgnoreCase("employeeName"))
                        return sord.equalsIgnoreCase("asc")
                                ? e1.getEmployeeName().compareTo(e2.getEmployeeName())
                                : e2.getEmployeeName().compareTo(e1.getEmployeeName());
                    else if (sidx.equalsIgnoreCase("jobTitle"))
                        return sord.equalsIgnoreCase("asc") ? e1.getJobTitle().compareTo(e2.getJobTitle())
                                : e2.getJobTitle().compareTo(e1.getJobTitle());
                    else if (sidx.equalsIgnoreCase("manager"))
                        return sord.equalsIgnoreCase("asc") ? e1.getManager().compareTo(e2.getManager())
                                : e2.getManager().compareTo(e1.getManager());
                        return sord.equalsIgnoreCase("asc")
                                ? e1.getEmployeeName().compareTo(e2.getEmployeeName())
                                : e2.getEmployeeName().compareTo(e1.getEmployeeName());


        records = allEmployees.size();
        total = (int) Math.ceil((double) records / (double) rows);

        gridModel = new ArrayList<EmployeeBO>();
        to = to > records ? records : to;
        for (int iCounter = from; iCounter < to; iCounter++) {
            EmployeeBO employeeBO = allEmployees.get(iCounter);
            //new EmployeeBO((Employee) employeeLinkedHashMap.values().toArray()[iCounter]);

    } catch (Exception ex) {
        throw new ExceptionWrapper(ex);

    return SUCCESS;

From source file:at.jps.sanction.core.util.TokenTool.java

public static float compareCheckDoubleMetaphone(final String text1, final String text2, final boolean fuzzy,
        final int minlen, final double fuzzyValue) {

    final DoubleMetaphone encoder = new DoubleMetaphone(); // TODO: in reallife
    // make/*from ww w  .  j av  a2  s  . com*/
    // this go away !!

    return (compareCheck(encoder.doubleMetaphone(text1), encoder.doubleMetaphone(text2), fuzzy, minlen,


From source file:edu.mayo.informatics.cts.CTSVAPI.lucene.LuceneSearch.java

private void init(String indexLocation) throws UnexpectedError {
    try {/*from   www  . ja v a  2 s .  c  o m*/
        service_ = new IndexerService(indexLocation, false);
        indexSearchers_ = new Hashtable();
        codeSystemToIndexMap_ = new Hashtable();

        if (CTSConstants.LUCENE_SEARCH_ENABLED.getValue()) {
            WhiteSpaceLowerCaseAnalyzer wslca = new WhiteSpaceLowerCaseAnalyzer(new String[] {},

            extraWhiteSpaceChars_ = wslca.getCurrentCharRemovalTable();

            // Use a FieldSkippingAnalyzer, so it doesn't tokenize on the non-tokenized fields.
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new FieldSkippingAnalyzer(
                    new String[] { "codingSchemeName", "conceptCode", "isActive", "isPreferred",
                            "presentationFormat", "language", "conceptStatus", "propertyId", "dataType",
                            "degreeOfFidelity", "representationalForm", "matchIfNoContext", "property" },

            if (CTSConstants.LUCENE_DOUBLE_METAPHONE_SEARCH_ENABLED.getValue()) {
                EncoderAnalyzer temp = new EncoderAnalyzer(new DoubleMetaphone(), new String[] {},
                analyzer.addAnalyzer("dm_propertyValue", temp);

            if (CTSConstants.LUCENE_STEMMED_SEARCH_ENABLED.getValue()) {
                SnowballAnalyzer sa = new SnowballAnalyzer(false, "English", new String[] {},
                analyzer.addAnalyzer("stem_propertyValue", sa);

            // LVG Norm searching has been retired.
            // if (CTSConstants.LUCENE_NORM_SEARCH_ENABLED.getValue())
            //             {
            //                 try
            //                 {
            //                     NormAnalyzer temp = new NormAnalyzer(false, new String[]{}, WhiteSpaceLowerCaseAnalyzer
            //                             .getDefaultCharRemovalSet(), WhiteSpaceLowerCaseAnalyzer.getDefaultWhiteSpaceSet());
            //                     // indexerService_.createIndex(normIndexName_, temp);
            //                     analyzer.addAnalyzer("norm_propertyValue", temp);
            //                 }
            //                 catch (NoClassDefFoundError e)
            //                 {
            //                     // norm is not available
            //                     CTSConstants.LUCENE_NORM_SEARCH_ENABLED.setValue(false);
            //                     logger.error("LuceneNormSearch could not be initialized.  Is Norm (lvg) on the classpath?", e);
            //                 }
            //             }

            parser_ = new QueryParser("propertyValue", analyzer);

        } else {
            logger.error("Tried to init a Lucene searcher when lucene search is not enabled");
            throw new UnexpectedError("Lucene search functionality is not enabled in the configuration files.");
    } catch (UnexpectedError e) {
        throw e;
    } catch (InternalErrorException e) {
        throw new UnexpectedError("There was a problem opening the lucene index.");

From source file:com.puppetlabs.geppetto.pp.dsl.contentassist.PPProposalsGenerator.java

 * Attempts to produce a list of names that are close to the given name. At most 5 proposals
 * are generated. The returned proposals are made in order of "pronunciation distance" which is
 * obtained by taking the Levenshtein distance between the Double Monophone encodings of
 * candidate and given name. Candidates are selected as the names with shortest Levenshtein distance
 * and names that are Monophonically equal, or starts or ends monophonically.
 * // ww  w  .  ja  va 2 s.c o  m
 * @param currentName
 *            the name for which proposals are to be generated
 * @param descs
 *            the descriptors of available named values
 * @param searchPath
 *            TODO
 * @param types
 *            if stated, the wanted types of named values
 * @return
 *         array of proposals, possibly empty, but never null.
public String[] computeProposals(final String currentName, Collection<IEObjectDescription> descs,
        boolean upperCaseProposals, PPSearchPath searchPath, EClass... types) {
    if (currentName == null || currentName.length() < 1)
        return new String[0];

    // compute the 5 best matches and only accept if score <= 5
    ScoreKeeper<IEObjectDescription> tracker = new ScoreKeeper<IEObjectDescription>(5, false, 5);
    // List<IEObjectDescription> metaphoneAlike = Lists.newArrayList();
    final DoubleMetaphone encoder = new DoubleMetaphone();
    final String metaphoneName = encoder.encode(currentName);

    for (IEObjectDescription d : descs) {
        EClass c = d.getEClass();
        typeok: if (types != null && types.length > 0) {
            for (EClass wanted : types)
                if ((wanted == c || wanted.isSuperTypeOf(c)))
                    break typeok;
        // filter based on path visibility
        if (searchPath.searchIndexOf(d) == -1)
            continue; // not visible according to path

        String candidateName = converter.toString(d.getName());
        tracker.addScore(StringUtils.getLevenshteinDistance(currentName, candidateName), d);
        String candidateMetaphone = encoder.encode(candidateName);
        // metaphone matches are scored on the pronounciation distance
        if (metaphoneName.equals(candidateMetaphone) //
                || candidateMetaphone.startsWith(metaphoneName) //
                || candidateMetaphone.endsWith(metaphoneName) //
            tracker.addScore(StringUtils.getLevenshteinDistance(metaphoneName, candidateMetaphone), d);
        // System.err.printf("Metaphone alike: %s == %s\n", currentName, candidateName);
    List<String> result = Lists.newArrayList();
    // System.err.print("Scores = ");
    for (ScoreEntry<IEObjectDescription> entry : tracker.getScoreEntries()) {
        String s = converter.toString(entry.getData().getName());
        // System.err.printf("%d %s, ", entry.getScore(), s);
    // System.err.println();

    String[] proposals = result.toArray(new String[result.size()]);

    PronunciationComparator x = new PronunciationComparator(encoder, metaphoneName);

    Arrays.sort(proposals, x);
    // System.err.print("Order = ");
    // for(int i = 0; i < proposals.length; i++)
    // System.err.printf("%s, ", proposals[i]);
    // System.err.println();
    return upperCaseProposals ? toUpperCaseProposals(proposals) : proposals;

From source file:it.univpm.deit.semedia.musicuri.core.Toolset.java

 * Genarates a list of terms that are the metaphone equivalents of the words in the given list.
 * The terms are generated using the double metaphone phonetic maching algorithm (apache implementation)
 * @param keywords an aArrayList object containing the keywords to generate metaphones for 
 * @return an aArrayList object containing the generated metaphone equivalent terms
 *//*from   w  ww .  j av  a 2  s. c o  m*/
public static ArrayList GenerateMetaphones(ArrayList keywords) {
    ArrayList metaphoneList = new ArrayList(keywords.size());
    DoubleMetaphone meta = new DoubleMetaphone();
    String tmp = null;

    for (int i = 0; i < keywords.size(); i++) {
        tmp = meta.encode((String) keywords.get(i));
    return metaphoneList;

From source file:org.activityinfo.server.endpoint.refine.ReconciliationServiceTest.java

public void doubleMetaphone() {

    DoubleMetaphone encoder = new DoubleMetaphone();
    System.out.println(encoder.doubleMetaphone("Tin-E") + " " + encoder.doubleMetaphone("Youwarou", true));


From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java

public void testAlgorithms() throws Exception {
    assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg",
            new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
    assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "KKK", "ESKS" });

    assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg",
            new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
    assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg",
            new String[] { "A", "PP", "KK", "ASKS" });

    assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg",
            new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
    assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg",
            new String[] { "A000", "B000", "C000", "E220" });

    assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg",
            new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
    assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg",
            new String[] { "A0", "B1", "C3", "E034034" });

    assertAlgorithm(new Caverphone2(), true, "Darda Karleen Datha Carlene", new String[] { "TTA1111111",
            "Darda", "KLN1111111", "Karleen", "TTA1111111", "Datha", "KLN1111111", "Carlene" });
    assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene",
            new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });

    assertAlgorithm(new Nysiis(), true, "aaa bbb ccc easgasg",
            new String[] { "A", "aaa", "B", "bbb", "C", "ccc", "EASGAS", "easgasg" });
    assertAlgorithm(new Nysiis(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "C", "EASGAS" });

From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java

/** blast some random strings through the analyzer */
public void testRandomStrings() throws IOException {
    Encoder encoders[] = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(),
            new RefinedSoundex(), new Caverphone2() };

    for (final Encoder e : encoders) {
        Analyzer a = new Analyzer() {
            @Override//  w w w. j  a va  2  s .  c o  m
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));

        checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);

        Analyzer b = new Analyzer() {
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));

        checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);

From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java

public void testEmptyTerm() throws IOException {
    Encoder encoders[] = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(),
            new RefinedSoundex(), new Caverphone2() };
    for (final Encoder e : encoders) {
        Analyzer a = new Analyzer() {
            @Override/*  w  ww.j  av  a2  s  . c om*/
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new KeywordTokenizer();
                return new TokenStreamComponents(tokenizer,
                        new PhoneticFilter(tokenizer, e, random().nextBoolean()));
        checkOneTerm(a, "", "");