Java tutorial
// ============================================================================ // // Copyright (C) 2006-2015 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataquality.standardization.index.test; import java.io.File; import java.io.IOException; import junit.framework.TestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.util.Version; /** * DOC scorreia class global comment. Detailled comment */ public class SynonymTest extends TestCase { /** * */ private static final String FIELD_NAME = "name"; private static final String directoryPath = "data/test"; public void testRun() { MMapDirectory index; try { index = new MMapDirectory(new File(directoryPath)); // The same analyzer should be used for indexing and searching Analyzer analyzer = createAnalyzer(); // Analyzer analyzer = new StandardAnalyzer(); // the boolean arg in the IndexWriter ctor means to // create a new index, overwriting any existing index IndexWriter w = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); // read the data (this will be the input data of a component called // tFirstNameStandardize) String name = "Stephane"; String gender = "M"; Document doc = addDoc(w, name, gender); addSynonym(FIELD_NAME, "Steph", doc); w.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void testSearch() { // TODO search for steph Directory dir = null; IndexSearcher is = null; try { dir = FSDirectory.open(new File(directoryPath)); is = new IndexSearcher(dir); Analyzer analyzer = createAnalyzer(); // Term termName = new Term("steph"); QueryParser qp = new QueryParser(luceneVersion, FIELD_NAME, analyzer); Query q = qp.parse("Stephane"); TopDocsCollector<?> collector = TopScoreDocCollector.create(2, false); is.search(q, collector); ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs; System.out.println("nb doc= " + scoreDocs.length); for (ScoreDoc scoreDoc : scoreDocs) { System.out.println(scoreDoc); } } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } private static final Version luceneVersion = Version.LUCENE_30; static Analyzer createAnalyzer() { return new StandardAnalyzer(luceneVersion); } /** * DOC scorreia Comment method "addSynonym". * * @param fieldName * @param synonym * @param doc */ private void addSynonym(String fieldName, String synonym, Document doc) { assert doc != null; doc.add(new Field(fieldName, synonym, Field.Store.YES, Field.Index.ANALYZED)); } private static Document addDoc(IndexWriter w, String name, String gender) throws IOException { Document doc = new Document(); Field field = new Field(FIELD_NAME, name, Field.Store.YES, Field.Index.ANALYZED, TermVector.YES); doc.add(field); doc.add(new Field("gender", gender, Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.YES)); w.addDocument(doc); return doc; } }