Example usage for org.apache.lucene.search.spell LuceneDictionary LuceneDictionary

List of usage examples for org.apache.lucene.search.spell LuceneDictionary LuceneDictionary

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell LuceneDictionary LuceneDictionary.

Prototype

public LuceneDictionary(IndexReader reader, String field) 

Source Link

Document

Creates a new Dictionary, pulling source terms from the specified field in the provided reader

Usage

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper.java

License:Apache License

public static void updateSuggester(Directory directory, Analyzer analyzer, IndexReader reader)
        throws IOException {
    File tempDir = null;/*from  w w w .j a v  a  2 s. c om*/
    try {
        //Analyzing infix suggester takes a file parameter. It uses its path to getDirectory()
        //for actual storage of suggester data. BUT, while building it also does getDirectory() to
        //a temporary location (original path + ".tmp"). So, instead we create a temp dir and also
        //create a placeholder non-existing-sub-child which would mark the location when we want to return
        //our internal suggestion OakDirectory. After build is done, we'd delete the temp directory
        //thereby removing any temp stuff that suggester created in the interim.
        tempDir = Files.createTempDir();
        File tempSubChild = new File(tempDir, "non-existing-sub-child");

        if (reader.getDocCount(FieldNames.SUGGEST) > 0) {
            Dictionary dictionary = new LuceneDictionary(reader, FieldNames.SUGGEST);
            getLookup(directory, analyzer, tempSubChild).build(dictionary);
        }
    } catch (RuntimeException e) {
        log.debug("could not update the suggester", e);
    } finally {
        //cleanup temp dir
        if (tempDir != null && !FileUtils.deleteQuietly(tempDir)) {
            log.error("Cleanup failed for temp dir {}", tempDir.getAbsolutePath());
        }
    }
}

From source file:org.capelin.transaction.dao.RecordDao.java

License:GNU General Public License

/**
 * Spell check the term from the field in index, return similar terms. If
 * the keyword is not tokenized, this will give the full name and function
 * like browse.//from  ww  w. j  a v a  2 s  .c  o m
 * 
 * @param field
 * @param term
 * @return
 */
public String[] spellcheck(String field, String term) {
    SearchFactory searchFactory = Search.getFullTextSession(getSession()).getSearchFactory();
    DirectoryProvider<?> recordProvider = searchFactory.getDirectoryProviders(recordClass)[0];
    ReaderProvider readerProvider = searchFactory.getReaderProvider();
    IndexReader reader = readerProvider.openReader(recordProvider);
    String[] similars = null;
    try {
        SpellChecker spellchecker = new SpellChecker(recordProvider.getDirectory());
        spellchecker.indexDictionary(new LuceneDictionary(reader, field));
        spellchecker.setAccuracy(0.0001f);
        similars = spellchecker.suggestSimilar(term, getPageSize(), reader, field, true);
    } catch (IOException e) {
        log.error("Index not found: " + e);
    } finally {
        readerProvider.closeReader(reader);
    }
    return similars;
}

From source file:org.codesearch.searcher.server.util.STAlternativeSuggestor.java

License:Open Source License

/**
 * Creates the spell index for the SpellChecker
 * @param field/*w  w  w . ja va2 s.c  o  m*/
 * @param originalIndexDirectory
 * @param spellIndexDirectory
 * @throws IOException
 */
public void createSpellIndex(String field, Directory originalIndexDirectory, Directory spellIndexDirectory)
        throws IOException {
    IndexReader indexReader = null;
    try {
        indexReader = IndexReader.open(originalIndexDirectory);
        Dictionary dictionary = new LuceneDictionary(indexReader, field);
        SpellChecker spellChecker = new SpellChecker(spellIndexDirectory);
        spellChecker.indexDictionary(dictionary);
    } finally {
        if (indexReader != null) {
            indexReader.close();
        }
    }
}

From source file:org.codesearch.searcher.server.util.STAutocompleter.java

License:Open Source License

public void setupIndex(Directory sourceDirectory, String fieldToAutocomplete)
        throws CorruptIndexException, IOException {
    IndexReader sourceReader = IndexReader.open(sourceDirectory);
    LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete);
    IndexWriter writer = new IndexWriter(autoCompleteDirectory, new STAutocompleteLuceneAnalyzer(),
            IndexWriter.MaxFieldLength.UNLIMITED);
    writer.setMergeFactor(300);/*from   w  w  w.j a  va 2 s. c  o  m*/
    writer.setMaxBufferedDocs(150);
    Map<String, Integer> wordsMap = new HashMap<String, Integer>();
    Iterator<String> iter = dict.getWordsIterator();
    while (iter.hasNext()) {
        String word = iter.next();
        if (word.length() < 0) {
            continue;
        }
        wordsMap.put(word, sourceReader.docFreq(new Term(fieldToAutocomplete, word)));
    }
    LOG.info("SetupIndex: " + GRAMMED_WORDS_FIELD);
    for (String word : wordsMap.keySet()) {
        Document doc = new Document();
        doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, Field.Index.NOT_ANALYZED));
        LOG.info("source:" + word);
        doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, Field.Index.ANALYZED));
        LOG.info("grammed:" + word);
        writer.addDocument(doc);
    }
    sourceReader.close();
    writer.optimize();
    writer.close();
    setupReader();
}

From source file:org.olat.search.service.spell.SearchSpellChecker.java

License:Apache License

/**
 * Creates a new spell-check index based on search-index
 *///from  ww  w  .j ava 2  s.c  o m
public void createSpellIndex() {
    if (isSpellCheckEnabled) {
        IndexReader indexReader = null;
        try {
            log.info("Start generating Spell-Index...");
            long startSpellIndexTime = 0;
            if (log.isDebug()) {
                startSpellIndexTime = System.currentTimeMillis();
            }
            final Directory indexDir = FSDirectory.open(new File(indexPath));
            indexReader = IndexReader.open(indexDir);
            // 1. Create content spellIndex
            final File spellDictionaryFile = new File(spellDictionaryPath);
            final Directory contentSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + CONTENT_PATH));// true
            final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
            final Dictionary contentDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.CONTENT_FIELD_NAME);
            contentSpellChecker.indexDictionary(contentDictionary);
            // 2. Create title spellIndex
            final Directory titleSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + TITLE_PATH));// true
            final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
            final Dictionary titleDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.TITLE_FIELD_NAME);
            titleSpellChecker.indexDictionary(titleDictionary);
            // 3. Create description spellIndex
            final Directory descriptionSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true
            final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
            final Dictionary descriptionDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            descriptionSpellChecker.indexDictionary(descriptionDictionary);
            // 4. Create author spellIndex
            final Directory authorSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + AUTHOR_PATH));// true
            final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
            final Dictionary authorDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.AUTHOR_FIELD_NAME);
            authorSpellChecker.indexDictionary(authorDictionary);

            // Merge all part spell indexes (content,title etc.) to one common spell index
            final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true
            final IndexWriter merger = new IndexWriter(spellIndexDirectory,
                    new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
            final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory,
                    descriptionSpellIndexDirectory, authorSpellIndexDirectory };
            merger.addIndexesNoOptimize(directories);
            merger.optimize();
            merger.close();
            spellChecker = new SpellChecker(spellIndexDirectory);
            spellChecker.setAccuracy(0.7f);
            if (log.isDebug()) {
                log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
            }
            log.info("New generated Spell-Index ready to use.");
        } catch (final IOException ioEx) {
            log.warn("Can not create SpellIndex", ioEx);
        } finally {
            if (indexReader != null) {
                try {
                    indexReader.close();
                } catch (final IOException e) {
                    log.warn("Can not close indexReader properly", e);
                }
            }
        }
    }
}

From source file:org.opengrok.suggest.SuggesterProjectData.java

License:Open Source License

private WFSTCompletionLookup build(final IndexReader indexReader, final String field) throws IOException {
    WFSTInputIterator iterator = new WFSTInputIterator(
            new LuceneDictionary(indexReader, field).getEntryIterator(), indexReader, field,
            getSearchCounts(field));// www .j a v a 2s. c  o  m

    WFSTCompletionLookup lookup = createWFST();
    lookup.build(iterator);

    double averageLength = (double) iterator.termLengthAccumulator / lookup.getCount();
    averageLengths.put(field, averageLength);

    return lookup;
}

From source file:org.sakaiproject.search.journal.impl.JournaledFSIndexStorage.java

License:Educational Community License

private void createSpellIndex(IndexReader indexReader) {
    if (!serverConfigurationService.getBoolean("search.experimental.didyoumean", false)) {
        return;//ww  w . ja v a 2 s. com
    }

    log.info("create Spell Index");

    Long start = System.currentTimeMillis();
    try {

        log.info("main index is in: " + journalSettings.getSearchIndexDirectory());
        log.info("local base is: " + journalSettings.getLocalIndexBase());
        spellIndexDirectory = new NIOFSDirectory(new File(journalSettings.getLocalIndexBase() + "/spellindex"));
        if (indexReader == null) {
            log.info("unable to get index reader aborting spellindex creation");
            return;
        }
        Dictionary dictionary = new LuceneDictionary(indexReader, SearchService.FIELD_CONTENTS);
        SpellChecker spellChecker = new SpellChecker(spellIndexDirectory);
        spellChecker.clearIndex();
        spellChecker.indexDictionary(dictionary);
        log.info("New Spell dictionary constructed in " + (System.currentTimeMillis() - start));
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();

    }

    log.info("All done in " + (System.currentTimeMillis() - start));

}

From source file:org.silverpeas.search.indexEngine.model.DidYouMeanIndexer.java

License:Open Source License

/**
 * creates or updates a spelling index. The spelling index is created or updated from an existing
 * index. The spelling index is used to suggest words when an user executes a query that returns
 * unsatisfactory results. if a spelling index already exists, only the new words contained in the
 * index source will be added. otherwise a new index will be created
 * @param field name of the field of the index source that will be used to feed the spelling index
 * @param originalIndexDirectory represents the source index path
 * @param spellIndexDirectory represents the spelling index path
 *//*w  w w  .  j  a  v a 2  s.co  m*/
public static void createSpellIndex(String field, String originalIndexDirectory, String spellIndexDirectory) {
    // stop the process if method parameters is null or empty
    if (!StringUtil.isDefined(field) || !StringUtil.isDefined(originalIndexDirectory)
            || !StringUtil.isDefined(spellIndexDirectory)) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_INVALID_ARG");
        return;
    }
    // initializes local variable
    IndexReader indexReader = null;

    try {
        // create a file object with given path
        File file = new File(spellIndexDirectory);
        // open original index
        FSDirectory directory = FSDirectory.open(file);
        indexReader = IndexReader.open(FSDirectory.open(new File(originalIndexDirectory)));
        // create a Lucene dictionary with the original index
        Dictionary dictionary = new LuceneDictionary(indexReader, field);
        // index the dictionary into the spelling index
        SpellChecker spellChecker = new SpellChecker(directory);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
                new StandardAnalyzer(Version.LUCENE_36));
        spellChecker.indexDictionary(dictionary, config, true);
        spellChecker.close();
    } catch (CorruptIndexException e) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_INDEX_FAILED", e);
    } catch (IOException e) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_LOAD_IO_EXCEPTION", e);
    } finally {
        IOUtils.closeQuietly(indexReader);
    }

}

From source file:org.watermint.sourcecolon.org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * Generate a spelling suggestion for the definitions stored in defs
 *//* w w  w.  j  a v  a 2  s . com*/
public void createSpellingSuggestions() {
    IndexReader indexReader = null;
    SpellChecker checker = null;

    try {
        log.info("Generating spelling suggestion index ... ");
        indexReader = IndexReader.open(indexDirectory);
        checker = new SpellChecker(spellDirectory);
        //TODO below seems only to index "defs" , possible bug ?
        checker.indexDictionary(new LuceneDictionary(indexReader, "defs"),
                new IndexWriterConfig(Version.LUCENE_36, null), true);
        log.info("done");
    } catch (IOException e) {
        log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e);
    } finally {
        if (indexReader != null) {
            try {
                indexReader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occurred while closing reader", e);
            }
        }
        if (spellDirectory != null) {
            spellDirectory.close();
        }
    }
}

From source file:resource.IndexFiles.java

License:Apache License

private static void createDictionary(Analyzer analyzer) throws IOException {
    Directory dictionaryDir = FSDirectory.open(new File(DICTIONARY_PATH));
    Directory indexDir = FSDirectory.open(new File(INDEX_PATH));

    IndexReader reader = DirectoryReader.open(indexDir);
    Dictionary dictionary = new LuceneDictionary(reader, "contents");
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);

    SpellChecker spellChecker = new SpellChecker(dictionaryDir);
    spellChecker.indexDictionary(dictionary, iwc, false);
    spellChecker.close();//from   www  .  j  a  v a  2  s  .co  m
}