Example usage for org.apache.lucene.search.spell LuceneDictionary LuceneDictionary

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell LuceneDictionary LuceneDictionary.

Prototype

public LuceneDictionary(IndexReader reader, String field)

Source Link

Document

Creates a new Dictionary, pulling source terms from the specified field in the provided reader

Usage

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper.java

License:Apache License

public static void updateSuggester(Directory directory, Analyzer analyzer, IndexReader reader)
        throws IOException {
    File tempDir = null;/*from  w w w .j a v  a  2 s. c om*/
    try {
        //Analyzing infix suggester takes a file parameter. It uses its path to getDirectory()
        //for actual storage of suggester data. BUT, while building it also does getDirectory() to
        //a temporary location (original path + ".tmp"). So, instead we create a temp dir and also
        //create a placeholder non-existing-sub-child which would mark the location when we want to return
        //our internal suggestion OakDirectory. After build is done, we'd delete the temp directory
        //thereby removing any temp stuff that suggester created in the interim.
        tempDir = Files.createTempDir();
        File tempSubChild = new File(tempDir, "non-existing-sub-child");

        if (reader.getDocCount(FieldNames.SUGGEST) > 0) {
            Dictionary dictionary = new LuceneDictionary(reader, FieldNames.SUGGEST);
            getLookup(directory, analyzer, tempSubChild).build(dictionary);
        }
    } catch (RuntimeException e) {
        log.debug("could not update the suggester", e);
    } finally {
        //cleanup temp dir
        if (tempDir != null && !FileUtils.deleteQuietly(tempDir)) {
            log.error("Cleanup failed for temp dir {}", tempDir.getAbsolutePath());
        }
    }
}

From source file:org.capelin.transaction.dao.RecordDao.java

License:GNU General Public License

/**
 * Spell check the term from the field in index, return similar terms. If
 * the keyword is not tokenized, this will give the full name and function
 * like browse.//from  ww  w. j  a v a  2 s  .c  o m
 * 
 * @param field
 * @param term
 * @return
 */
public String[] spellcheck(String field, String term) {
    SearchFactory searchFactory = Search.getFullTextSession(getSession()).getSearchFactory();
    DirectoryProvider<?> recordProvider = searchFactory.getDirectoryProviders(recordClass)[0];
    ReaderProvider readerProvider = searchFactory.getReaderProvider();
    IndexReader reader = readerProvider.openReader(recordProvider);
    String[] similars = null;
    try {
        SpellChecker spellchecker = new SpellChecker(recordProvider.getDirectory());
        spellchecker.indexDictionary(new LuceneDictionary(reader, field));
        spellchecker.setAccuracy(0.0001f);
        similars = spellchecker.suggestSimilar(term, getPageSize(), reader, field, true);
    } catch (IOException e) {
        log.error("Index not found: " + e);
    } finally {
        readerProvider.closeReader(reader);
    }
    return similars;
}

From source file:org.codesearch.searcher.server.util.STAlternativeSuggestor.java

License:Open Source License

/**
 * Creates the spell index for the SpellChecker
 * @param field/*w  w  w . ja va2 s.c  o  m*/
 * @param originalIndexDirectory
 * @param spellIndexDirectory
 * @throws IOException
 */
public void createSpellIndex(String field, Directory originalIndexDirectory, Directory spellIndexDirectory)
        throws IOException {
    IndexReader indexReader = null;
    try {
        indexReader = IndexReader.open(originalIndexDirectory);
        Dictionary dictionary = new LuceneDictionary(indexReader, field);
        SpellChecker spellChecker = new SpellChecker(spellIndexDirectory);
        spellChecker.indexDictionary(dictionary);
    } finally {
        if (indexReader != null) {
            indexReader.close();
        }
    }
}

From source file:org.codesearch.searcher.server.util.STAutocompleter.java

License:Open Source License

public void setupIndex(Directory sourceDirectory, String fieldToAutocomplete)
        throws CorruptIndexException, IOException {
    IndexReader sourceReader = IndexReader.open(sourceDirectory);
    LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete);
    IndexWriter writer = new IndexWriter(autoCompleteDirectory, new STAutocompleteLuceneAnalyzer(),
            IndexWriter.MaxFieldLength.UNLIMITED);
    writer.setMergeFactor(300);/*from   w  w  w.j a  va 2 s. c  o  m*/
    writer.setMaxBufferedDocs(150);
    Map<String, Integer> wordsMap = new HashMap<String, Integer>();
    Iterator<String> iter = dict.getWordsIterator();
    while (iter.hasNext()) {
        String word = iter.next();
        if (word.length() < 0) {
            continue;
        }
        wordsMap.put(word, sourceReader.docFreq(new Term(fieldToAutocomplete, word)));
    }
    LOG.info("SetupIndex: " + GRAMMED_WORDS_FIELD);
    for (String word : wordsMap.keySet()) {
        Document doc = new Document();
        doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, Field.Index.NOT_ANALYZED));
        LOG.info("source:" + word);
        doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, Field.Index.ANALYZED));
        LOG.info("grammed:" + word);
        writer.addDocument(doc);
    }
    sourceReader.close();
    writer.optimize();
    writer.close();
    setupReader();
}

From source file:org.olat.search.service.spell.SearchSpellChecker.java

License:Apache License

/**
 * Creates a new spell-check index based on search-index
 *///from  ww  w  .j ava 2  s.c  o m
public void createSpellIndex() {
    if (isSpellCheckEnabled) {
        IndexReader indexReader = null;
        try {
            log.info("Start generating Spell-Index...");
            long startSpellIndexTime = 0;
            if (log.isDebug()) {
                startSpellIndexTime = System.currentTimeMillis();
            }
            final Directory indexDir = FSDirectory.open(new File(indexPath));
            indexReader = IndexReader.open(indexDir);
            // 1. Create content spellIndex
            final File spellDictionaryFile = new File(spellDictionaryPath);
            final Directory contentSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + CONTENT_PATH));// true
            final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
            final Dictionary contentDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.CONTENT_FIELD_NAME);
            contentSpellChecker.indexDictionary(contentDictionary);
            // 2. Create title spellIndex
            final Directory titleSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + TITLE_PATH));// true
            final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
            final Dictionary titleDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.TITLE_FIELD_NAME);
            titleSpellChecker.indexDictionary(titleDictionary);
            // 3. Create description spellIndex
            final Directory descriptionSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true
            final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
            final Dictionary descriptionDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            descriptionSpellChecker.indexDictionary(descriptionDictionary);
            // 4. Create author spellIndex
            final Directory authorSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + AUTHOR_PATH));// true
            final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
            final Dictionary authorDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.AUTHOR_FIELD_NAME);
            authorSpellChecker.indexDictionary(authorDictionary);

            // Merge all part spell indexes (content,title etc.) to one common spell index
            final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true
            final IndexWriter merger = new IndexWriter(spellIndexDirectory,
                    new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
            final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory,
                    descriptionSpellIndexDirectory, authorSpellIndexDirectory };
            merger.addIndexesNoOptimize(directories);
            merger.optimize();
            merger.close();
            spellChecker = new SpellChecker(spellIndexDirectory);
            spellChecker.setAccuracy(0.7f);
            if (log.isDebug()) {
                log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
            }
            log.info("New generated Spell-Index ready to use.");
        } catch (final IOException ioEx) {
            log.warn("Can not create SpellIndex", ioEx);
        } finally {
            if (indexReader != null) {
                try {
                    indexReader.close();
                } catch (final IOException e) {
                    log.warn("Can not close indexReader properly", e);
                }
            }
        }
    }
}

From source file:org.opengrok.suggest.SuggesterProjectData.java

License:Open Source License

private WFSTCompletionLookup build(final IndexReader indexReader, final String field) throws IOException {
    WFSTInputIterator iterator = new WFSTInputIterator(
            new LuceneDictionary(indexReader, field).getEntryIterator(), indexReader, field,
            getSearchCounts(field));// www .j a v a 2s. c  o  m

    WFSTCompletionLookup lookup = createWFST();
    lookup.build(iterator);

    double averageLength = (double) iterator.termLengthAccumulator / lookup.getCount();
    averageLengths.put(field, averageLength);

    return lookup;
}

From source file:org.sakaiproject.search.journal.impl.JournaledFSIndexStorage.java

License:Educational Community License

private void createSpellIndex(IndexReader indexReader) {
    if (!serverConfigurationService.getBoolean("search.experimental.didyoumean", false)) {
        return;//ww  w . ja v a 2 s. com
    }

    log.info("create Spell Index");

    Long start = System.currentTimeMillis();
    try {

        log.info("main index is in: " + journalSettings.getSearchIndexDirectory());
        log.info("local base is: " + journalSettings.getLocalIndexBase());
        spellIndexDirectory = new NIOFSDirectory(new File(journalSettings.getLocalIndexBase() + "/spellindex"));
        if (indexReader == null) {
            log.info("unable to get index reader aborting spellindex creation");
            return;
        }
        Dictionary dictionary = new LuceneDictionary(indexReader, SearchService.FIELD_CONTENTS);
        SpellChecker spellChecker = new SpellChecker(spellIndexDirectory);
        spellChecker.clearIndex();
        spellChecker.indexDictionary(dictionary);
        log.info("New Spell dictionary constructed in " + (System.currentTimeMillis() - start));
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();

    }

    log.info("All done in " + (System.currentTimeMillis() - start));

}

From source file:org.silverpeas.search.indexEngine.model.DidYouMeanIndexer.java

License:Open Source License

/**
 * creates or updates a spelling index. The spelling index is created or updated from an existing
 * index. The spelling index is used to suggest words when an user executes a query that returns
 * unsatisfactory results. if a spelling index already exists, only the new words contained in the
 * index source will be added. otherwise a new index will be created
 * @param field name of the field of the index source that will be used to feed the spelling index
 * @param originalIndexDirectory represents the source index path
 * @param spellIndexDirectory represents the spelling index path
 *//*w  w w  .  j  a  v a 2  s.co  m*/
public static void createSpellIndex(String field, String originalIndexDirectory, String spellIndexDirectory) {
    // stop the process if method parameters is null or empty
    if (!StringUtil.isDefined(field) || !StringUtil.isDefined(originalIndexDirectory)
            || !StringUtil.isDefined(spellIndexDirectory)) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_INVALID_ARG");
        return;
    }
    // initializes local variable
    IndexReader indexReader = null;

    try {
        // create a file object with given path
        File file = new File(spellIndexDirectory);
        // open original index
        FSDirectory directory = FSDirectory.open(file);
        indexReader = IndexReader.open(FSDirectory.open(new File(originalIndexDirectory)));
        // create a Lucene dictionary with the original index
        Dictionary dictionary = new LuceneDictionary(indexReader, field);
        // index the dictionary into the spelling index
        SpellChecker spellChecker = new SpellChecker(directory);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
                new StandardAnalyzer(Version.LUCENE_36));
        spellChecker.indexDictionary(dictionary, config, true);
        spellChecker.close();
    } catch (CorruptIndexException e) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_INDEX_FAILED", e);
    } catch (IOException e) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_LOAD_IO_EXCEPTION", e);
    } finally {
        IOUtils.closeQuietly(indexReader);
    }

}

From source file:org.watermint.sourcecolon.org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * Generate a spelling suggestion for the definitions stored in defs
 *//* w w  w.  j  a v  a 2  s . com*/
public void createSpellingSuggestions() {
    IndexReader indexReader = null;
    SpellChecker checker = null;

    try {
        log.info("Generating spelling suggestion index ... ");
        indexReader = IndexReader.open(indexDirectory);
        checker = new SpellChecker(spellDirectory);
        //TODO below seems only to index "defs" , possible bug ?
        checker.indexDictionary(new LuceneDictionary(indexReader, "defs"),
                new IndexWriterConfig(Version.LUCENE_36, null), true);
        log.info("done");
    } catch (IOException e) {
        log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e);
    } finally {
        if (indexReader != null) {
            try {
                indexReader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occurred while closing reader", e);
            }
        }
        if (spellDirectory != null) {
            spellDirectory.close();
        }
    }
}

From source file:resource.IndexFiles.java

License:Apache License

private static void createDictionary(Analyzer analyzer) throws IOException {
    Directory dictionaryDir = FSDirectory.open(new File(DICTIONARY_PATH));
    Directory indexDir = FSDirectory.open(new File(INDEX_PATH));

    IndexReader reader = DirectoryReader.open(indexDir);
    Dictionary dictionary = new LuceneDictionary(reader, "contents");
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);

    SpellChecker spellChecker = new SpellChecker(dictionaryDir);
    spellChecker.indexDictionary(dictionary, iwc, false);
    spellChecker.close();//from   www  .  j  a  v a  2  s  .co  m
}