Example usage for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone

List of usage examples for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone

Introduction

In this page you can find the example usage for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone.

Prototype

public DoubleMetaphone() 

Source Link

Document

Creates an instance of this DoubleMetaphone encoder

Usage

From source file:org.apache.solr.analysis.TestPhoneticFilter.java

public void testEncodes() throws Exception {
    runner(new DoubleMetaphone(), true);
    runner(new Metaphone(), true);
    runner(new Soundex(), true);
    runner(new RefinedSoundex(), true);

    runner(new DoubleMetaphone(), false);
    runner(new Metaphone(), false);
    runner(new Soundex(), false);
    runner(new RefinedSoundex(), false);
}

From source file:org.elasticsearch.index.analysis.phonetic.PhoneticTokenFilterFactory.java

@Inject
public PhoneticTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name,
        @Assisted Settings settings) {/*from   w w  w .java 2 s  .c  o m*/
    super(index, indexSettings, name, settings);
    this.replace = settings.getAsBoolean("replace", true);
    String encoder = settings.get("encoder");
    if (encoder == null) {
        throw new ElasticSearchIllegalArgumentException("encoder must be set on phonetic token filter");
    }
    if ("metaphone".equalsIgnoreCase(encoder)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encoder)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encoder)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encoder)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encoder)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encoder) || "refinedSoundex".equalsIgnoreCase(encoder)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encoder)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encoder) || "doubleMetaphone".equalsIgnoreCase(encoder)) {
        DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
        doubleMetaphone.setMaxCodeLen(settings.getAsInt("max_code_len", doubleMetaphone.getMaxCodeLen()));
        this.encoder = doubleMetaphone;
    } else {
        throw new ElasticSearchIllegalArgumentException(
                "unknown encoder [" + encoder + "] for phonetic token filter");
    }
}

From source file:org.mitre.opensextant.phonetic.Phoneticizer.java

public Phoneticizer() {

    // populate the algorithms Map with an instance of each encoder

    // first the ones from Apache Commons
    BeiderMorseEncoder bmExact = new BeiderMorseEncoder();
    bmExact.setRuleType(RuleType.EXACT);
    bmExact.setConcat(false);/*from   w w w. j  a  va  2 s. co  m*/
    BeiderMorseEncoder bmApprox = new BeiderMorseEncoder();
    bmApprox.setRuleType(RuleType.APPROX);
    bmApprox.setConcat(false);
    // StringEncoder caver = new Caverphone();
    StringEncoder caver1 = new Caverphone1();
    StringEncoder caver2 = new Caverphone2();
    StringEncoder colgne = new ColognePhonetic();
    DoubleMetaphone doubleMeta = new DoubleMetaphone();
    doubleMeta.setMaxCodeLen(10);
    StringEncoder meta = new Metaphone();
    StringEncoder refinedSound = new RefinedSoundex();
    StringEncoder sound = new Soundex();

    // now, the home-brewed ones
    StringEncoder noop = new NullEncoder();
    StringEncoder caser = new CaseEncoder();
    StringEncoder diaRemover = new DiacriticEncoder();
    StringEncoder punctRemover = new PunctEncoder();
    StringEncoder simple0 = new SimplePhonetic0Encoder();
    StringEncoder simple1 = new SimplePhonetic1Encoder();
    StringEncoder simple2 = new SimplePhonetic2Encoder();

    // not really language encodings
    // StringEncoder qcode = new QCodec();
    // StringEncoder qpcode = new QuotedPrintableCodec();
    // StringEncoder urlcode = new URLCodec();

    algorithms.put("Beider-Morse-Exact", bmExact);
    algorithms.put("Beider-Morse-Approximate", bmApprox);
    // algorithms.put("CaverPhone", caver);
    algorithms.put("CaverPhone_1.0", caver1);
    algorithms.put("CaverPhone_2.0", caver2);
    algorithms.put("Cologne_Phonetic", colgne);
    algorithms.put("Double_Metaphone", doubleMeta);
    algorithms.put("Metaphone", meta);
    algorithms.put("Refined_Soundex", refinedSound);
    algorithms.put("Soundex", sound);

    algorithms.put("Nothing", noop);
    algorithms.put("Case_Insensitive", caser);
    algorithms.put("Diacritic_Insensitive", diaRemover);
    algorithms.put("Puncuation_Insensitive", punctRemover);
    algorithms.put("Simple_Phonetic0", simple0);
    algorithms.put("Simple_Phonetic1", simple1);
    algorithms.put("Simple_Phonetic2", simple2);

    // not really language encodings
    // algorithms.put("Q Code", qcode);
    // algorithms.put("Q Printable", qpcode);
    // algorithms.put("URL Code", urlcode);

}

From source file:org.openhie.openempi.transformation.function.DoubleMetaphoneFunction.java

public DoubleMetaphoneFunction() {
    super();
    metaphone = new DoubleMetaphone();
}

From source file:org.openregistry.core.domain.AbstractNameImpl.java

protected final String generateSoundEx(final String comparison) {
    final DoubleMetaphone dmp = new DoubleMetaphone();
    return dmp.encode(comparison);
}

From source file:org.opensextant.phonetic.Phoneticizer.java

public Phoneticizer() {
    // populate the algorithms Map with an instance of each encoder
    // first the ones from Apache Commons
    BeiderMorseEncoder bmExact = new BeiderMorseEncoder();
    bmExact.setRuleType(RuleType.EXACT);
    bmExact.setConcat(false);/*from w  ww.  j  av a 2  s  .c o m*/
    BeiderMorseEncoder bmApprox = new BeiderMorseEncoder();
    bmApprox.setRuleType(RuleType.APPROX);
    bmApprox.setConcat(false);
    // StringEncoder caver = new Caverphone();
    StringEncoder caver1 = new Caverphone1();
    StringEncoder caver2 = new Caverphone2();
    StringEncoder colgne = new ColognePhonetic();
    DoubleMetaphone doubleMeta = new DoubleMetaphone();
    doubleMeta.setMaxCodeLen(10);
    StringEncoder meta = new Metaphone();
    StringEncoder refinedSound = new RefinedSoundex();
    StringEncoder sound = new Soundex();
    // now, the home-brewed ones
    StringEncoder noop = new NullEncoder();
    StringEncoder caser = new CaseEncoder();
    StringEncoder diaRemover = new DiacriticEncoder();
    StringEncoder punctRemover = new PunctEncoder();
    StringEncoder simple0 = new SimplePhonetic0Encoder();
    StringEncoder simple0Solr = new SimplePhonetic0SolrEncoder();
    StringEncoder simple0SolrPlus = new SimplePhonetic0SolrPlusEncoder();
    StringEncoder simple1 = new SimplePhonetic1Encoder();
    StringEncoder simple2 = new SimplePhonetic2Encoder();
    // not really language encodings
    // StringEncoder qcode = new QCodec();
    // StringEncoder qpcode = new QuotedPrintableCodec();
    // StringEncoder urlcode = new URLCodec();
    algorithms.put("Beider-Morse-Exact", bmExact);
    algorithms.put("Beider-Morse-Approximate", bmApprox);
    // algorithms.put("CaverPhone", caver);
    algorithms.put("CaverPhone_1.0", caver1);
    algorithms.put("CaverPhone_2.0", caver2);
    algorithms.put("Cologne_Phonetic", colgne);
    algorithms.put("Double_Metaphone", doubleMeta);
    algorithms.put("Metaphone", meta);
    algorithms.put("Refined_Soundex", refinedSound);
    algorithms.put("Soundex", sound);
    algorithms.put("Nothing", noop);
    algorithms.put("Case_Insensitive", caser);
    algorithms.put("Diacritic_Insensitive", diaRemover);
    algorithms.put("Puncuation_Insensitive", punctRemover);
    algorithms.put("Simple_Phonetic0", simple0);
    algorithms.put("Simple_Phonetic0Solr", simple0Solr);
    algorithms.put("Simple_Phonetic0SolrPlus", simple0SolrPlus);
    algorithms.put("Simple_Phonetic1", simple1);
    algorithms.put("Simple_Phonetic2", simple2);
    // not really language encodings
    // algorithms.put("Q Code", qcode);
    // algorithms.put("Q Printable", qpcode);
    // algorithms.put("URL Code", urlcode);
}

From source file:org.pentaho.di.core.row.ValueDataUtil.java

public static String get_Double_Metaphone(ValueMetaInterface metaA, Object dataA) {
    if (dataA == null) {
        return null;
    }/*from ww w.  j a  v a2  s . c o m*/
    return (new DoubleMetaphone()).doubleMetaphone(dataA.toString());
}

From source file:org.pentaho.di.trans.steps.fuzzymatch.FuzzyMatch.java

private String getEncodedMF(String value, Integer algorithmType) {
    String encodedValueMF = "";
    switch (algorithmType) {
    case FuzzyMatchMeta.OPERATION_TYPE_METAPHONE:
        encodedValueMF = (new Metaphone()).metaphone(value);
        break;/* w w  w .  j a va2  s  . co  m*/
    case FuzzyMatchMeta.OPERATION_TYPE_DOUBLE_METAPHONE:
        encodedValueMF = ((new DoubleMetaphone()).doubleMetaphone(value));
        break;
    case FuzzyMatchMeta.OPERATION_TYPE_SOUNDEX:
        encodedValueMF = (new Soundex()).encode(value);
        break;
    case FuzzyMatchMeta.OPERATION_TYPE_REFINED_SOUNDEX:
        encodedValueMF = (new RefinedSoundex()).encode(value);
        break;
    default:
        break;
    }
    return encodedValueMF;
}

From source file:org.vivoweb.harvester.score.algorithm.NormalizedDoubleMetaphoneDifference.java

@Override
public float calculate(CharSequence itemX, CharSequence itemY) {
    if (itemX.length() == 0 || itemY.length() == 0) {
        return 0f;
    }//from   ww w.j a va 2s  .  c o  m
    DoubleMetaphone dm = new DoubleMetaphone();
    String dmX = dm.encode(itemX.toString());
    String dmY = dm.encode(itemY.toString());
    return new NormalizedLevenshteinDifference().calculate(dmX, dmY);
}

From source file:org.werelate.util.PlaceUtils.java

/**
 * Tokenize on spaces and double-metaphone encode the specified string
 * @param value/*  ww w .ja  v a2  s.c o m*/
 * @return
 */
public static String doubleMetaphoneEncode(String value) {
    DoubleMetaphone dm = new DoubleMetaphone();
    dm.setMaxCodeLen(8);
    StringBuilder buf = new StringBuilder();
    String[] pieces = value.split("\\s");
    for (int i = 0; i < pieces.length; i++) {
        if (i > 0) {
            buf.append(' ');
        }
        buf.append(dm.doubleMetaphone(pieces[i]));
    }
    return buf.toString();
}