List of usage examples for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone
public DoubleMetaphone()
From source file:org.apache.solr.analysis.TestPhoneticFilter.java
public void testEncodes() throws Exception { runner(new DoubleMetaphone(), true); runner(new Metaphone(), true); runner(new Soundex(), true); runner(new RefinedSoundex(), true); runner(new DoubleMetaphone(), false); runner(new Metaphone(), false); runner(new Soundex(), false); runner(new RefinedSoundex(), false); }
From source file:org.elasticsearch.index.analysis.phonetic.PhoneticTokenFilterFactory.java
@Inject public PhoneticTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {/*from w w w .java 2 s .c o m*/ super(index, indexSettings, name, settings); this.replace = settings.getAsBoolean("replace", true); String encoder = settings.get("encoder"); if (encoder == null) { throw new ElasticSearchIllegalArgumentException("encoder must be set on phonetic token filter"); } if ("metaphone".equalsIgnoreCase(encoder)) { this.encoder = new Metaphone(); } else if ("soundex".equalsIgnoreCase(encoder)) { this.encoder = new Soundex(); } else if ("caverphone1".equalsIgnoreCase(encoder)) { this.encoder = new Caverphone1(); } else if ("caverphone2".equalsIgnoreCase(encoder)) { this.encoder = new Caverphone2(); } else if ("caverphone".equalsIgnoreCase(encoder)) { this.encoder = new Caverphone2(); } else if ("refined_soundex".equalsIgnoreCase(encoder) || "refinedSoundex".equalsIgnoreCase(encoder)) { this.encoder = new RefinedSoundex(); } else if ("cologne".equalsIgnoreCase(encoder)) { this.encoder = new ColognePhonetic(); } else if ("double_metaphone".equalsIgnoreCase(encoder) || "doubleMetaphone".equalsIgnoreCase(encoder)) { DoubleMetaphone doubleMetaphone = new DoubleMetaphone(); doubleMetaphone.setMaxCodeLen(settings.getAsInt("max_code_len", doubleMetaphone.getMaxCodeLen())); this.encoder = doubleMetaphone; } else { throw new ElasticSearchIllegalArgumentException( "unknown encoder [" + encoder + "] for phonetic token filter"); } }
From source file:org.mitre.opensextant.phonetic.Phoneticizer.java
public Phoneticizer() { // populate the algorithms Map with an instance of each encoder // first the ones from Apache Commons BeiderMorseEncoder bmExact = new BeiderMorseEncoder(); bmExact.setRuleType(RuleType.EXACT); bmExact.setConcat(false);/*from w w w. j a va 2 s. co m*/ BeiderMorseEncoder bmApprox = new BeiderMorseEncoder(); bmApprox.setRuleType(RuleType.APPROX); bmApprox.setConcat(false); // StringEncoder caver = new Caverphone(); StringEncoder caver1 = new Caverphone1(); StringEncoder caver2 = new Caverphone2(); StringEncoder colgne = new ColognePhonetic(); DoubleMetaphone doubleMeta = new DoubleMetaphone(); doubleMeta.setMaxCodeLen(10); StringEncoder meta = new Metaphone(); StringEncoder refinedSound = new RefinedSoundex(); StringEncoder sound = new Soundex(); // now, the home-brewed ones StringEncoder noop = new NullEncoder(); StringEncoder caser = new CaseEncoder(); StringEncoder diaRemover = new DiacriticEncoder(); StringEncoder punctRemover = new PunctEncoder(); StringEncoder simple0 = new SimplePhonetic0Encoder(); StringEncoder simple1 = new SimplePhonetic1Encoder(); StringEncoder simple2 = new SimplePhonetic2Encoder(); // not really language encodings // StringEncoder qcode = new QCodec(); // StringEncoder qpcode = new QuotedPrintableCodec(); // StringEncoder urlcode = new URLCodec(); algorithms.put("Beider-Morse-Exact", bmExact); algorithms.put("Beider-Morse-Approximate", bmApprox); // algorithms.put("CaverPhone", caver); algorithms.put("CaverPhone_1.0", caver1); algorithms.put("CaverPhone_2.0", caver2); algorithms.put("Cologne_Phonetic", colgne); algorithms.put("Double_Metaphone", doubleMeta); algorithms.put("Metaphone", meta); algorithms.put("Refined_Soundex", refinedSound); algorithms.put("Soundex", sound); algorithms.put("Nothing", noop); algorithms.put("Case_Insensitive", caser); algorithms.put("Diacritic_Insensitive", diaRemover); algorithms.put("Puncuation_Insensitive", punctRemover); algorithms.put("Simple_Phonetic0", simple0); algorithms.put("Simple_Phonetic1", simple1); algorithms.put("Simple_Phonetic2", simple2); // not really language encodings // algorithms.put("Q Code", qcode); // algorithms.put("Q Printable", qpcode); // algorithms.put("URL Code", urlcode); }
From source file:org.openhie.openempi.transformation.function.DoubleMetaphoneFunction.java
public DoubleMetaphoneFunction() { super(); metaphone = new DoubleMetaphone(); }
From source file:org.openregistry.core.domain.AbstractNameImpl.java
protected final String generateSoundEx(final String comparison) { final DoubleMetaphone dmp = new DoubleMetaphone(); return dmp.encode(comparison); }
From source file:org.opensextant.phonetic.Phoneticizer.java
public Phoneticizer() { // populate the algorithms Map with an instance of each encoder // first the ones from Apache Commons BeiderMorseEncoder bmExact = new BeiderMorseEncoder(); bmExact.setRuleType(RuleType.EXACT); bmExact.setConcat(false);/*from w ww. j av a 2 s .c o m*/ BeiderMorseEncoder bmApprox = new BeiderMorseEncoder(); bmApprox.setRuleType(RuleType.APPROX); bmApprox.setConcat(false); // StringEncoder caver = new Caverphone(); StringEncoder caver1 = new Caverphone1(); StringEncoder caver2 = new Caverphone2(); StringEncoder colgne = new ColognePhonetic(); DoubleMetaphone doubleMeta = new DoubleMetaphone(); doubleMeta.setMaxCodeLen(10); StringEncoder meta = new Metaphone(); StringEncoder refinedSound = new RefinedSoundex(); StringEncoder sound = new Soundex(); // now, the home-brewed ones StringEncoder noop = new NullEncoder(); StringEncoder caser = new CaseEncoder(); StringEncoder diaRemover = new DiacriticEncoder(); StringEncoder punctRemover = new PunctEncoder(); StringEncoder simple0 = new SimplePhonetic0Encoder(); StringEncoder simple0Solr = new SimplePhonetic0SolrEncoder(); StringEncoder simple0SolrPlus = new SimplePhonetic0SolrPlusEncoder(); StringEncoder simple1 = new SimplePhonetic1Encoder(); StringEncoder simple2 = new SimplePhonetic2Encoder(); // not really language encodings // StringEncoder qcode = new QCodec(); // StringEncoder qpcode = new QuotedPrintableCodec(); // StringEncoder urlcode = new URLCodec(); algorithms.put("Beider-Morse-Exact", bmExact); algorithms.put("Beider-Morse-Approximate", bmApprox); // algorithms.put("CaverPhone", caver); algorithms.put("CaverPhone_1.0", caver1); algorithms.put("CaverPhone_2.0", caver2); algorithms.put("Cologne_Phonetic", colgne); algorithms.put("Double_Metaphone", doubleMeta); algorithms.put("Metaphone", meta); algorithms.put("Refined_Soundex", refinedSound); algorithms.put("Soundex", sound); algorithms.put("Nothing", noop); algorithms.put("Case_Insensitive", caser); algorithms.put("Diacritic_Insensitive", diaRemover); algorithms.put("Puncuation_Insensitive", punctRemover); algorithms.put("Simple_Phonetic0", simple0); algorithms.put("Simple_Phonetic0Solr", simple0Solr); algorithms.put("Simple_Phonetic0SolrPlus", simple0SolrPlus); algorithms.put("Simple_Phonetic1", simple1); algorithms.put("Simple_Phonetic2", simple2); // not really language encodings // algorithms.put("Q Code", qcode); // algorithms.put("Q Printable", qpcode); // algorithms.put("URL Code", urlcode); }
From source file:org.pentaho.di.core.row.ValueDataUtil.java
public static String get_Double_Metaphone(ValueMetaInterface metaA, Object dataA) { if (dataA == null) { return null; }/*from ww w. j a v a2 s . c o m*/ return (new DoubleMetaphone()).doubleMetaphone(dataA.toString()); }
From source file:org.pentaho.di.trans.steps.fuzzymatch.FuzzyMatch.java
private String getEncodedMF(String value, Integer algorithmType) { String encodedValueMF = ""; switch (algorithmType) { case FuzzyMatchMeta.OPERATION_TYPE_METAPHONE: encodedValueMF = (new Metaphone()).metaphone(value); break;/* w w w . j a va2 s . co m*/ case FuzzyMatchMeta.OPERATION_TYPE_DOUBLE_METAPHONE: encodedValueMF = ((new DoubleMetaphone()).doubleMetaphone(value)); break; case FuzzyMatchMeta.OPERATION_TYPE_SOUNDEX: encodedValueMF = (new Soundex()).encode(value); break; case FuzzyMatchMeta.OPERATION_TYPE_REFINED_SOUNDEX: encodedValueMF = (new RefinedSoundex()).encode(value); break; default: break; } return encodedValueMF; }
From source file:org.vivoweb.harvester.score.algorithm.NormalizedDoubleMetaphoneDifference.java
@Override public float calculate(CharSequence itemX, CharSequence itemY) { if (itemX.length() == 0 || itemY.length() == 0) { return 0f; }//from ww w.j a va 2s . c o m DoubleMetaphone dm = new DoubleMetaphone(); String dmX = dm.encode(itemX.toString()); String dmY = dm.encode(itemY.toString()); return new NormalizedLevenshteinDifference().calculate(dmX, dmY); }
From source file:org.werelate.util.PlaceUtils.java
/** * Tokenize on spaces and double-metaphone encode the specified string * @param value/* ww w .ja v a2 s.c o m*/ * @return */ public static String doubleMetaphoneEncode(String value) { DoubleMetaphone dm = new DoubleMetaphone(); dm.setMaxCodeLen(8); StringBuilder buf = new StringBuilder(); String[] pieces = value.split("\\s"); for (int i = 0; i < pieces.length; i++) { if (i > 0) { buf.append(' '); } buf.append(dm.doubleMetaphone(pieces[i])); } return buf.toString(); }