Java tutorial
/** * openkm, Open Document Management System (http://www.openkm.com) * Copyright (c) 2006-2013 Paco Avila & Josep Llort * * No bytes were intentionally harmed during the development of this application. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ package com.ikon.module.db.stuff; import java.io.File; import java.io.IOException; import java.io.StringReader; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import javax.persistence.EntityManager; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.LuceneDictionary; import org.apache.lucene.search.spell.SpellChecker; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.hibernate.ScrollableResults; import org.hibernate.search.FullTextSession; import org.hibernate.search.SearchFactory; import org.hibernate.search.jpa.FullTextEntityManager; import org.hibernate.search.jpa.Search; import org.hibernate.search.reader.ReaderProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.ikon.dao.bean.NodeDocument; import com.ikon.dao.bean.NodeDocumentVersion; /** * Provides various helper operations for working with Lucene indexes. */ public class IndexHelper { private static Logger log = LoggerFactory.getLogger(IndexHelper.class); private EntityManager entityManager; public void checkIndexOnStartup() { //log.info("Observed event {1} from Thread {0}", Thread.currentThread().getName(), App.INIT_SUCCESS); // See if we need to rebuild the index during startup ... FullTextEntityManager ftEm = Search.getFullTextEntityManager(entityManager); SearchFactory searchFactory = ftEm.getSearchFactory(); ReaderProvider readerProvider = searchFactory.getReaderProvider(); IndexReader reader = readerProvider .openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]); int maxDoc = 0; try { maxDoc = reader.maxDoc(); } finally { readerProvider.closeReader(reader); } if (maxDoc == 0) { log.warn("No objects indexed ... rebuilding Lucene search index from database ..."); long _exit = 0L; long _entr = System.currentTimeMillis(); try { int docs = doRebuildIndex(); _exit = System.currentTimeMillis(); log.info("Took " + (_exit - _entr) + " (ms) to re-build the index containing " + docs + " documents."); } catch (Exception exc) { if (exc instanceof RuntimeException) { throw (RuntimeException) exc; } else { throw new RuntimeException(exc); } } // build the spell checker index off of the HS index. buildSpellCheckerIndex(searchFactory); } } public void updateSpellCheckerIndex(NodeDocumentVersion nDocVer) { log.info("Observed Wine added/updated event for {1} from Thread {0}", Thread.currentThread().getName(), String.valueOf(nDocVer)); String text = (nDocVer != null) ? nDocVer.getText() : null; if (text != null) { Dictionary dictionary = null; try { FullTextEntityManager ftEm = (FullTextEntityManager) entityManager; SearchFactory searchFactory = ftEm.getSearchFactory(); dictionary = new SetDictionary(text, searchFactory.getAnalyzer("wine_en")); } catch (IOException ioExc) { log.error("Failed to analyze dictionary text {0} from Wine {1} to update spell checker due to: {2}" + text + nDocVer.getUuid() + ioExc.toString()); } if (dictionary != null) { Directory dir = null; // only allow one thread to update the index at a time ... // the Dictionary is pre-computed, so it should happen quickly // ... // this synchronized approach only works because this component // is application-scoped synchronized (this) { try { dir = FSDirectory.open(new File("lucene_index/spellcheck")); SpellChecker spell = new SpellChecker(dir); spell.indexDictionary(dictionary); spell.close(); log.info("Successfully updated the spell checker index after Document added/updated."); } catch (Exception exc) { log.error("Failed to update the spell checker index!", exc); } finally { if (dir != null) { try { dir.close(); } catch (Exception zzz) { } } } } } } } // used to feed updates to the spell checker index class SetDictionary implements Dictionary { private Set<String> wordSet; @SuppressWarnings("unused") SetDictionary(String words, Analyzer analyzer) throws IOException { wordSet = new HashSet<String>(); if (words != null) { TokenStream tokenStream = analyzer.tokenStream(NodeDocument.TEXT_FIELD, new StringReader(words)); Token reusableToken = new Token(); Token nextToken = null; //while ((nextToken = tokenStream.next(reusableToken)) != null) { //String term = nextToken.term(); //if (term != null) { //wordSet.add(term); //} //} } } @SuppressWarnings({ "unchecked", "rawtypes" }) public Iterator getWordsIterator() { return wordSet.iterator(); } } protected void buildSpellCheckerIndex(SearchFactory searchFactory) { IndexReader reader = null; Directory dir = null; long _entr = System.currentTimeMillis(); File spellCheckIndexDir = new File("lucene_index/spellcheck"); log.info("Building SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath()); ReaderProvider readerProvider = searchFactory.getReaderProvider(); try { reader = readerProvider.openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]); dir = FSDirectory.open(spellCheckIndexDir); SpellChecker spell = new SpellChecker(dir); spell.clearIndex(); spell.indexDictionary(new LuceneDictionary(reader, NodeDocument.TEXT_FIELD)); spell.close(); dir.close(); dir = null; long _exit = System.currentTimeMillis(); log.info("Took {1} (ms) to build SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath(), String.valueOf((_exit - _entr))); } catch (Exception exc) { log.error("Failed to build spell checker index!", exc); } finally { if (dir != null) { try { dir.close(); } catch (Exception zzz) { } } if (reader != null) { readerProvider.closeReader(reader); } } } protected int doRebuildIndex() throws Exception { FullTextSession fullTextSession = (FullTextSession) entityManager.getDelegate(); fullTextSession.setFlushMode(org.hibernate.FlushMode.MANUAL); fullTextSession.setCacheMode(org.hibernate.CacheMode.IGNORE); fullTextSession.purgeAll(NodeDocumentVersion.class); fullTextSession.getSearchFactory().optimize(NodeDocumentVersion.class); String query = "select ndv from NodeDocumentVersion ndv"; ScrollableResults cursor = fullTextSession.createQuery(query).scroll(); cursor.last(); int count = cursor.getRowNumber() + 1; log.warn("Re-building Wine index for " + count + " objects."); if (count > 0) { int batchSize = 300; cursor.first(); // Reset to first result row int i = 0; while (true) { fullTextSession.index(cursor.get(0)); if (++i % batchSize == 0) { fullTextSession.flushToIndexes(); fullTextSession.clear(); // Clear persistence context for each batch log.info("Flushed index update " + i + " from Thread " + Thread.currentThread().getName()); } if (cursor.isLast()) { break; } cursor.next(); } } cursor.close(); fullTextSession.flushToIndexes(); fullTextSession.clear(); // Clear persistence context for each batch fullTextSession.getSearchFactory().optimize(NodeDocumentVersion.class); return count; } }