Example usage for org.apache.commons.codec.language DoubleMetaphone setMaxCodeLen

List of usage examples for org.apache.commons.codec.language DoubleMetaphone setMaxCodeLen

Introduction

In this page you can find the example usage for org.apache.commons.codec.language DoubleMetaphone setMaxCodeLen.

Prototype

public void setMaxCodeLen(int maxCodeLen) 

Source Link

Document

Sets the maxCodeLen.

Usage

From source file:org.elasticsearch.index.analysis.phonetic.PhoneticTokenFilterFactory.java

@Inject
public PhoneticTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name,
        @Assisted Settings settings) {//from w  ww .  j av  a  2s .com
    super(index, indexSettings, name, settings);
    this.replace = settings.getAsBoolean("replace", true);
    String encoder = settings.get("encoder");
    if (encoder == null) {
        throw new ElasticSearchIllegalArgumentException("encoder must be set on phonetic token filter");
    }
    if ("metaphone".equalsIgnoreCase(encoder)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encoder)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encoder)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encoder)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encoder)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encoder) || "refinedSoundex".equalsIgnoreCase(encoder)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encoder)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encoder) || "doubleMetaphone".equalsIgnoreCase(encoder)) {
        DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
        doubleMetaphone.setMaxCodeLen(settings.getAsInt("max_code_len", doubleMetaphone.getMaxCodeLen()));
        this.encoder = doubleMetaphone;
    } else {
        throw new ElasticSearchIllegalArgumentException(
                "unknown encoder [" + encoder + "] for phonetic token filter");
    }
}

From source file:org.mitre.opensextant.phonetic.Phoneticizer.java

public Phoneticizer() {

    // populate the algorithms Map with an instance of each encoder

    // first the ones from Apache Commons
    BeiderMorseEncoder bmExact = new BeiderMorseEncoder();
    bmExact.setRuleType(RuleType.EXACT);
    bmExact.setConcat(false);//w ww . ja  v a 2  s .  c  o m
    BeiderMorseEncoder bmApprox = new BeiderMorseEncoder();
    bmApprox.setRuleType(RuleType.APPROX);
    bmApprox.setConcat(false);
    // StringEncoder caver = new Caverphone();
    StringEncoder caver1 = new Caverphone1();
    StringEncoder caver2 = new Caverphone2();
    StringEncoder colgne = new ColognePhonetic();
    DoubleMetaphone doubleMeta = new DoubleMetaphone();
    doubleMeta.setMaxCodeLen(10);
    StringEncoder meta = new Metaphone();
    StringEncoder refinedSound = new RefinedSoundex();
    StringEncoder sound = new Soundex();

    // now, the home-brewed ones
    StringEncoder noop = new NullEncoder();
    StringEncoder caser = new CaseEncoder();
    StringEncoder diaRemover = new DiacriticEncoder();
    StringEncoder punctRemover = new PunctEncoder();
    StringEncoder simple0 = new SimplePhonetic0Encoder();
    StringEncoder simple1 = new SimplePhonetic1Encoder();
    StringEncoder simple2 = new SimplePhonetic2Encoder();

    // not really language encodings
    // StringEncoder qcode = new QCodec();
    // StringEncoder qpcode = new QuotedPrintableCodec();
    // StringEncoder urlcode = new URLCodec();

    algorithms.put("Beider-Morse-Exact", bmExact);
    algorithms.put("Beider-Morse-Approximate", bmApprox);
    // algorithms.put("CaverPhone", caver);
    algorithms.put("CaverPhone_1.0", caver1);
    algorithms.put("CaverPhone_2.0", caver2);
    algorithms.put("Cologne_Phonetic", colgne);
    algorithms.put("Double_Metaphone", doubleMeta);
    algorithms.put("Metaphone", meta);
    algorithms.put("Refined_Soundex", refinedSound);
    algorithms.put("Soundex", sound);

    algorithms.put("Nothing", noop);
    algorithms.put("Case_Insensitive", caser);
    algorithms.put("Diacritic_Insensitive", diaRemover);
    algorithms.put("Puncuation_Insensitive", punctRemover);
    algorithms.put("Simple_Phonetic0", simple0);
    algorithms.put("Simple_Phonetic1", simple1);
    algorithms.put("Simple_Phonetic2", simple2);

    // not really language encodings
    // algorithms.put("Q Code", qcode);
    // algorithms.put("Q Printable", qpcode);
    // algorithms.put("URL Code", urlcode);

}

From source file:org.opensextant.phonetic.Phoneticizer.java

public Phoneticizer() {
    // populate the algorithms Map with an instance of each encoder
    // first the ones from Apache Commons
    BeiderMorseEncoder bmExact = new BeiderMorseEncoder();
    bmExact.setRuleType(RuleType.EXACT);
    bmExact.setConcat(false);/*from ww  w .j ava2  s.com*/
    BeiderMorseEncoder bmApprox = new BeiderMorseEncoder();
    bmApprox.setRuleType(RuleType.APPROX);
    bmApprox.setConcat(false);
    // StringEncoder caver = new Caverphone();
    StringEncoder caver1 = new Caverphone1();
    StringEncoder caver2 = new Caverphone2();
    StringEncoder colgne = new ColognePhonetic();
    DoubleMetaphone doubleMeta = new DoubleMetaphone();
    doubleMeta.setMaxCodeLen(10);
    StringEncoder meta = new Metaphone();
    StringEncoder refinedSound = new RefinedSoundex();
    StringEncoder sound = new Soundex();
    // now, the home-brewed ones
    StringEncoder noop = new NullEncoder();
    StringEncoder caser = new CaseEncoder();
    StringEncoder diaRemover = new DiacriticEncoder();
    StringEncoder punctRemover = new PunctEncoder();
    StringEncoder simple0 = new SimplePhonetic0Encoder();
    StringEncoder simple0Solr = new SimplePhonetic0SolrEncoder();
    StringEncoder simple0SolrPlus = new SimplePhonetic0SolrPlusEncoder();
    StringEncoder simple1 = new SimplePhonetic1Encoder();
    StringEncoder simple2 = new SimplePhonetic2Encoder();
    // not really language encodings
    // StringEncoder qcode = new QCodec();
    // StringEncoder qpcode = new QuotedPrintableCodec();
    // StringEncoder urlcode = new URLCodec();
    algorithms.put("Beider-Morse-Exact", bmExact);
    algorithms.put("Beider-Morse-Approximate", bmApprox);
    // algorithms.put("CaverPhone", caver);
    algorithms.put("CaverPhone_1.0", caver1);
    algorithms.put("CaverPhone_2.0", caver2);
    algorithms.put("Cologne_Phonetic", colgne);
    algorithms.put("Double_Metaphone", doubleMeta);
    algorithms.put("Metaphone", meta);
    algorithms.put("Refined_Soundex", refinedSound);
    algorithms.put("Soundex", sound);
    algorithms.put("Nothing", noop);
    algorithms.put("Case_Insensitive", caser);
    algorithms.put("Diacritic_Insensitive", diaRemover);
    algorithms.put("Puncuation_Insensitive", punctRemover);
    algorithms.put("Simple_Phonetic0", simple0);
    algorithms.put("Simple_Phonetic0Solr", simple0Solr);
    algorithms.put("Simple_Phonetic0SolrPlus", simple0SolrPlus);
    algorithms.put("Simple_Phonetic1", simple1);
    algorithms.put("Simple_Phonetic2", simple2);
    // not really language encodings
    // algorithms.put("Q Code", qcode);
    // algorithms.put("Q Printable", qpcode);
    // algorithms.put("URL Code", urlcode);
}

From source file:org.werelate.util.PlaceUtils.java

/**
 * Tokenize on spaces and double-metaphone encode the specified string
 * @param value/* w w w .j  av  a 2 s . co  m*/
 * @return
 */
public static String doubleMetaphoneEncode(String value) {
    DoubleMetaphone dm = new DoubleMetaphone();
    dm.setMaxCodeLen(8);
    StringBuilder buf = new StringBuilder();
    String[] pieces = value.split("\\s");
    for (int i = 0; i < pieces.length; i++) {
        if (i > 0) {
            buf.append(' ');
        }
        buf.append(dm.doubleMetaphone(pieces[i]));
    }
    return buf.toString();
}