Example usage for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone

List of usage examples for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone

Introduction

In this page you can find the example usage for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone.

Prototype

public DoubleMetaphone() 

Source Link

Document

Creates an instance of this DoubleMetaphone encoder

Usage

From source file:io.klerch.alexa.tellask.model.AlexaInput.java

/**
 * Checks if a slot is contained in the intent request and has a value which is a
 * phonetic sibling of the string given to this method. Double metaphone algorithm
 * is optimized for English language and in this case is used to match slot value with
 * value given to this method.//from   w ww.j  a  v  a  2 s  . c  o m
 * @param slotName name of the slot to look after
 * @param value the value
 * @return True, if slot value and given value are phonetically equal with Double metaphone algorithm
 */
public boolean hasSlotIsDoubleMetaphoneEqual(final String slotName, final String value) {
    final String slotValue = getSlotValue(slotName);
    return hasSlotNotBlank(slotName) && value != null
            && new DoubleMetaphone().isDoubleMetaphoneEqual(slotValue, value);
}

From source file:com.perceptive.epm.perkolcentral.action.ajax.EmployeeDetailsAction.java

public String executeGetAllEmployees() throws ExceptionWrapper {
    try {//from www .  j  a va  2s  .com
        Soundex sndx = new Soundex();
        DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
        final StringEncoderComparator comparator1 = new StringEncoderComparator(doubleMetaphone);

        LoggingHelpUtil.printDebug("Page " + getPage() + " Rows " + getRows() + " Sorting Order " + getSord()
                + " Index Row :" + getSidx());
        LoggingHelpUtil.printDebug("Search :" + searchField + " " + searchOper + " " + searchString);

        // Calcalate until rows ware selected
        int to = (rows * page);

        // Calculate the first row to read
        int from = to - rows;
        LinkedHashMap<Long, EmployeeBO> employeeLinkedHashMap = new LinkedHashMap<Long, EmployeeBO>();

        employeeLinkedHashMap = employeeBL.getAllEmployees();
        ArrayList<EmployeeBO> allEmployees = new ArrayList<EmployeeBO>(employeeLinkedHashMap.values());
        //Handle search
        if (searchOper != null && !searchOper.trim().equalsIgnoreCase("") && searchString != null
                && !searchString.trim().equalsIgnoreCase("")) {
            if (searchOper.trim().equalsIgnoreCase("eq")) {
                CollectionUtils.filter(allEmployees, new Predicate() {
                    @Override
                    public boolean evaluate(Object o) {
                        return ((EmployeeBO) o).getEmployeeName().equalsIgnoreCase(searchString.trim()); //To change body of implemented methods use File | Settings | File Templates.
                    }
                });
            } else if (searchOper.trim().equalsIgnoreCase("slk")) {
                CollectionUtils.filter(allEmployees, new Predicate() {
                    @Override
                    public boolean evaluate(Object o) {
                        return (new StringEncoderComparator(new Soundex()).compare(
                                ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                searchString.trim().toLowerCase()) == 0
                                || new StringEncoderComparator(new DoubleMetaphone()).compare(
                                        ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                        searchString.trim().toLowerCase()) == 0
                                || new StringEncoderComparator(new Metaphone()).compare(
                                        ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                        searchString.trim().toLowerCase()) == 0
                                || new StringEncoderComparator(new RefinedSoundex()).compare(
                                        ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                        searchString.trim().toLowerCase()) == 0); //To change body of implemented methods use File | Settings | File Templates.
                    }
                });
            } else {
                //First check whether there is an exact match
                if (CollectionUtils.exists(allEmployees, new Predicate() {
                    @Override
                    public boolean evaluate(Object o) {
                        return (((EmployeeBO) o).getEmployeeName().toLowerCase()
                                .contains(searchString.trim().toLowerCase())); //To change body of implemented methods use File | Settings | File Templates.
                    }
                })) {
                    CollectionUtils.filter(allEmployees, new Predicate() {
                        @Override
                        public boolean evaluate(Object o) {
                            return (((EmployeeBO) o).getEmployeeName().toLowerCase()
                                    .contains(searchString.trim().toLowerCase()));
                        }
                    });
                } else {
                    ArrayList<String> matchedEmployeeIds = employeeBL.getLuceneUtil()
                            .getBestMatchEmployeeName(searchString.trim().toLowerCase());
                    allEmployees = new ArrayList<EmployeeBO>();
                    for (String id : matchedEmployeeIds) {
                        allEmployees.add(employeeBL.getAllEmployees().get(Long.valueOf(id)));
                    }
                }
            }

            /*{
            CollectionUtils.filter(allEmployees, new Predicate() {
                @Override
                public boolean evaluate(Object o) {
                    if (((EmployeeBO) o).getEmployeeName().toLowerCase().contains(searchString.trim().toLowerCase()))
                        return true;
                    else if(new StringEncoderComparator(new Soundex()).compare(((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0
                            || new StringEncoderComparator(new DoubleMetaphone()).compare(((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0)
                    {
                        return true;
                    }
                    else {
                        for (String empNameParts : ((EmployeeBO) o).getEmployeeName().trim().split(" ")) {
                            if (new StringEncoderComparator(new Soundex()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                    || new StringEncoderComparator(new DoubleMetaphone()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                //    || new StringEncoderComparator(new Metaphone()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                //    || new StringEncoderComparator(new RefinedSoundex()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                    ) {
                                return true;
                            }
                        }
                        return false;
                    }
                    
                    
                }
            });
            } */
        }
        //// Handle Order By
        if (sidx != null && !sidx.equals("")) {

            Collections.sort(allEmployees, new Comparator<EmployeeBO>() {
                public int compare(EmployeeBO e1, EmployeeBO e2) {
                    if (sidx.equalsIgnoreCase("employeeName"))
                        return sord.equalsIgnoreCase("asc")
                                ? e1.getEmployeeName().compareTo(e2.getEmployeeName())
                                : e2.getEmployeeName().compareTo(e1.getEmployeeName());
                    else if (sidx.equalsIgnoreCase("jobTitle"))
                        return sord.equalsIgnoreCase("asc") ? e1.getJobTitle().compareTo(e2.getJobTitle())
                                : e2.getJobTitle().compareTo(e1.getJobTitle());
                    else if (sidx.equalsIgnoreCase("manager"))
                        return sord.equalsIgnoreCase("asc") ? e1.getManager().compareTo(e2.getManager())
                                : e2.getManager().compareTo(e1.getManager());
                    else
                        return sord.equalsIgnoreCase("asc")
                                ? e1.getEmployeeName().compareTo(e2.getEmployeeName())
                                : e2.getEmployeeName().compareTo(e1.getEmployeeName());
                }
            });

        }
        //

        records = allEmployees.size();
        total = (int) Math.ceil((double) records / (double) rows);

        gridModel = new ArrayList<EmployeeBO>();
        to = to > records ? records : to;
        for (int iCounter = from; iCounter < to; iCounter++) {
            EmployeeBO employeeBO = allEmployees.get(iCounter);
            //new EmployeeBO((Employee) employeeLinkedHashMap.values().toArray()[iCounter]);
            gridModel.add(employeeBO);
        }

    } catch (Exception ex) {
        throw new ExceptionWrapper(ex);

    }
    return SUCCESS;
}

From source file:at.jps.sanction.core.util.TokenTool.java

public static float compareCheckDoubleMetaphone(final String text1, final String text2, final boolean fuzzy,
        final int minlen, final double fuzzyValue) {

    final DoubleMetaphone encoder = new DoubleMetaphone(); // TODO: in reallife
    // make/*from ww w  .  j av  a2  s  . com*/
    // this go away !!

    return (compareCheck(encoder.doubleMetaphone(text1), encoder.doubleMetaphone(text2), fuzzy, minlen,
            fuzzyValue));

}

From source file:edu.mayo.informatics.cts.CTSVAPI.lucene.LuceneSearch.java

private void init(String indexLocation) throws UnexpectedError {
    try {/*from   www  . ja v a  2 s .  c  o m*/
        service_ = new IndexerService(indexLocation, false);
        indexSearchers_ = new Hashtable();
        codeSystemToIndexMap_ = new Hashtable();

        if (CTSConstants.LUCENE_SEARCH_ENABLED.getValue()) {
            WhiteSpaceLowerCaseAnalyzer wslca = new WhiteSpaceLowerCaseAnalyzer(new String[] {},
                    WhiteSpaceLowerCaseAnalyzer.getDefaultCharRemovalSet(),
                    WhiteSpaceLowerCaseAnalyzer.getDefaultWhiteSpaceSet());

            extraWhiteSpaceChars_ = wslca.getCurrentCharRemovalTable();

            // Use a FieldSkippingAnalyzer, so it doesn't tokenize on the non-tokenized fields.
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new FieldSkippingAnalyzer(
                    new String[] { "codingSchemeName", "conceptCode", "isActive", "isPreferred",
                            "presentationFormat", "language", "conceptStatus", "propertyId", "dataType",
                            "degreeOfFidelity", "representationalForm", "matchIfNoContext", "property" },
                    wslca));

            if (CTSConstants.LUCENE_DOUBLE_METAPHONE_SEARCH_ENABLED.getValue()) {
                EncoderAnalyzer temp = new EncoderAnalyzer(new DoubleMetaphone(), new String[] {},
                        WhiteSpaceLowerCaseAnalyzer.getDefaultCharRemovalSet(),
                        WhiteSpaceLowerCaseAnalyzer.getDefaultWhiteSpaceSet());
                analyzer.addAnalyzer("dm_propertyValue", temp);
            }

            if (CTSConstants.LUCENE_STEMMED_SEARCH_ENABLED.getValue()) {
                SnowballAnalyzer sa = new SnowballAnalyzer(false, "English", new String[] {},
                        WhiteSpaceLowerCaseAnalyzer.getDefaultCharRemovalSet(),
                        WhiteSpaceLowerCaseAnalyzer.getDefaultWhiteSpaceSet());
                analyzer.addAnalyzer("stem_propertyValue", sa);
            }

            // LVG Norm searching has been retired.
            // if (CTSConstants.LUCENE_NORM_SEARCH_ENABLED.getValue())
            //             {
            //                 try
            //                 {
            //                     NormAnalyzer temp = new NormAnalyzer(false, new String[]{}, WhiteSpaceLowerCaseAnalyzer
            //                             .getDefaultCharRemovalSet(), WhiteSpaceLowerCaseAnalyzer.getDefaultWhiteSpaceSet());
            //                     // indexerService_.createIndex(normIndexName_, temp);
            //                     analyzer.addAnalyzer("norm_propertyValue", temp);
            //                 }
            //                 catch (NoClassDefFoundError e)
            //                 {
            //                     // norm is not available
            //                     CTSConstants.LUCENE_NORM_SEARCH_ENABLED.setValue(false);
            //                     logger.error("LuceneNormSearch could not be initialized.  Is Norm (lvg) on the classpath?", e);
            //                 }
            //             }

            parser_ = new QueryParser("propertyValue", analyzer);

        } else {
            logger.error("Tried to init a Lucene searcher when lucene search is not enabled");
            throw new UnexpectedError("Lucene search functionality is not enabled in the configuration files.");
        }
    } catch (UnexpectedError e) {
        throw e;
    } catch (InternalErrorException e) {
        throw new UnexpectedError("There was a problem opening the lucene index.");
    }
}

From source file:com.puppetlabs.geppetto.pp.dsl.contentassist.PPProposalsGenerator.java

/**
 * Attempts to produce a list of names that are close to the given name. At most 5 proposals
 * are generated. The returned proposals are made in order of "pronunciation distance" which is
 * obtained by taking the Levenshtein distance between the Double Monophone encodings of
 * candidate and given name. Candidates are selected as the names with shortest Levenshtein distance
 * and names that are Monophonically equal, or starts or ends monophonically.
 * // ww  w  .  ja  va 2 s.c o  m
 * @param currentName
 *            the name for which proposals are to be generated
 * @param descs
 *            the descriptors of available named values
 * @param searchPath
 *            TODO
 * @param types
 *            if stated, the wanted types of named values
 * @return
 *         array of proposals, possibly empty, but never null.
 */
public String[] computeProposals(final String currentName, Collection<IEObjectDescription> descs,
        boolean upperCaseProposals, PPSearchPath searchPath, EClass... types) {
    if (currentName == null || currentName.length() < 1)
        return new String[0];

    // compute the 5 best matches and only accept if score <= 5
    ScoreKeeper<IEObjectDescription> tracker = new ScoreKeeper<IEObjectDescription>(5, false, 5);
    // List<IEObjectDescription> metaphoneAlike = Lists.newArrayList();
    final DoubleMetaphone encoder = new DoubleMetaphone();
    final String metaphoneName = encoder.encode(currentName);

    for (IEObjectDescription d : descs) {
        EClass c = d.getEClass();
        typeok: if (types != null && types.length > 0) {
            for (EClass wanted : types)
                if ((wanted == c || wanted.isSuperTypeOf(c)))
                    break typeok;
            continue;
        }
        // filter based on path visibility
        if (searchPath.searchIndexOf(d) == -1)
            continue; // not visible according to path

        String candidateName = converter.toString(d.getName());
        tracker.addScore(StringUtils.getLevenshteinDistance(currentName, candidateName), d);
        String candidateMetaphone = encoder.encode(candidateName);
        // metaphone matches are scored on the pronounciation distance
        if (metaphoneName.equals(candidateMetaphone) //
                || candidateMetaphone.startsWith(metaphoneName) //
                || candidateMetaphone.endsWith(metaphoneName) //
        )
            tracker.addScore(StringUtils.getLevenshteinDistance(metaphoneName, candidateMetaphone), d);
        // System.err.printf("Metaphone alike: %s == %s\n", currentName, candidateName);
    }
    List<String> result = Lists.newArrayList();
    // System.err.print("Scores = ");
    for (ScoreEntry<IEObjectDescription> entry : tracker.getScoreEntries()) {
        String s = converter.toString(entry.getData().getName());
        result.add(s);
        // System.err.printf("%d %s, ", entry.getScore(), s);
    }
    // System.err.println();

    String[] proposals = result.toArray(new String[result.size()]);

    PronunciationComparator x = new PronunciationComparator(encoder, metaphoneName);

    Arrays.sort(proposals, x);
    // System.err.print("Order = ");
    // for(int i = 0; i < proposals.length; i++)
    // System.err.printf("%s, ", proposals[i]);
    // System.err.println();
    return upperCaseProposals ? toUpperCaseProposals(proposals) : proposals;
}

From source file:it.univpm.deit.semedia.musicuri.core.Toolset.java

/**
 * Genarates a list of terms that are the metaphone equivalents of the words in the given list.
 * The terms are generated using the double metaphone phonetic maching algorithm (apache implementation)
 * @param keywords an aArrayList object containing the keywords to generate metaphones for 
 * @return an aArrayList object containing the generated metaphone equivalent terms
 *//*from   w  ww .  j av  a 2  s. c o  m*/
public static ArrayList GenerateMetaphones(ArrayList keywords) {
    ArrayList metaphoneList = new ArrayList(keywords.size());
    DoubleMetaphone meta = new DoubleMetaphone();
    String tmp = null;

    for (int i = 0; i < keywords.size(); i++) {
        tmp = meta.encode((String) keywords.get(i));
        metaphoneList.add(tmp);
    }
    return metaphoneList;
}

From source file:org.activityinfo.server.endpoint.refine.ReconciliationServiceTest.java

@Test
public void doubleMetaphone() {

    DoubleMetaphone encoder = new DoubleMetaphone();
    System.out.println(encoder.doubleMetaphone("Tin-E") + " " + encoder.doubleMetaphone("Youwarou", true));
    System.out.println(encoder.doubleMetaphone("Youvarou"));

}

From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java

public void testAlgorithms() throws Exception {
    assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg",
            new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
    assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "KKK", "ESKS" });

    assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg",
            new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
    assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg",
            new String[] { "A", "PP", "KK", "ASKS" });

    assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg",
            new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
    assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg",
            new String[] { "A000", "B000", "C000", "E220" });

    assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg",
            new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
    assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg",
            new String[] { "A0", "B1", "C3", "E034034" });

    assertAlgorithm(new Caverphone2(), true, "Darda Karleen Datha Carlene", new String[] { "TTA1111111",
            "Darda", "KLN1111111", "Karleen", "TTA1111111", "Datha", "KLN1111111", "Carlene" });
    assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene",
            new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });

    assertAlgorithm(new Nysiis(), true, "aaa bbb ccc easgasg",
            new String[] { "A", "aaa", "B", "bbb", "C", "ccc", "EASGAS", "easgasg" });
    assertAlgorithm(new Nysiis(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "C", "EASGAS" });
}

From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java

/** blast some random strings through the analyzer */
public void testRandomStrings() throws IOException {
    Encoder encoders[] = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(),
            new RefinedSoundex(), new Caverphone2() };

    for (final Encoder e : encoders) {
        Analyzer a = new Analyzer() {
            @Override//  w w w. j  a va  2  s .  c o  m
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
            }
        };

        checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
        a.close();

        Analyzer b = new Analyzer() {
            @Override
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
            }
        };

        checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
        b.close();
    }
}

From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java

public void testEmptyTerm() throws IOException {
    Encoder encoders[] = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(),
            new RefinedSoundex(), new Caverphone2() };
    for (final Encoder e : encoders) {
        Analyzer a = new Analyzer() {
            @Override/*  w  ww.j  av  a2  s  . c om*/
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new KeywordTokenizer();
                return new TokenStreamComponents(tokenizer,
                        new PhoneticFilter(tokenizer, e, random().nextBoolean()));
            }
        };
        checkOneTerm(a, "", "");
        a.close();
    }
}