com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java Source code

Java tutorial

Introduction

Here is the source code for com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

Source

/**
 * Constellio, Open Source Enterprise Search
 * Copyright (C) 2010 DocuLibre inc.
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package com.doculibre.constellio.lucene;

import java.io.File;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.io.StringReader;
import java.text.SimpleDateFormat;
import java.util.AbstractList;
import java.util.Date;
import java.util.List;
import java.util.Locale;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import com.doculibre.constellio.utils.ClasspathUtils;

@SuppressWarnings("serial")
public abstract class BaseLuceneIndexHelper<T> implements IndexHelper<T> {

    public static final String NULL_VALUE = "_null_";
    private static final int TAILLE_CHAINE_NON_FRAGMENTEE = 250;
    private static final int TAILLE_FRAGMENT = 80;
    private static final int NB_BEST_FRAGMENT = 5;
    private static final String FRAGMENT_SEP = "...";
    public static final int NUMBER_LENGTH = 9;

    private File indexDir;

    private String[] searchFields;

    private AnalyzerProvider analyzerProvider = new AnalyzerProvider();

    public BaseLuceneIndexHelper(String indexDirName) {
        File indexesDir = ClasspathUtils.getLuceneIndexesDir();
        this.indexDir = new File(indexesDir, indexDirName);

        createIndexIfNecessary();
        Field[] indexFields = createIndexFields();
        searchFields = createSearchFields(indexFields);
    }

    public AnalyzerProvider getAnalyzerProvider() {
        return analyzerProvider;
    }

    @Override
    public synchronized boolean isEmpty() {
        boolean empty;
        try {
            Directory directory = FSDirectory.open(indexDir);
            IndexReader indexReader = DirectoryReader.open(directory);
            empty = indexReader.numDocs() <= 1;
            indexReader.close();
            directory.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return empty;
    }

    @Override
    public synchronized boolean isIndexed(T object) {
        return getDocNum(object) != -1;
    }

    @Override
    public synchronized void add(T object) {
        try {
            Directory directory = FSDirectory.open(indexDir);
            Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
            IndexWriter indexWriter = new IndexWriter(directory,
                    new IndexWriterConfig(Version.LUCENE_44, analyzer));
            add(object, indexWriter);
            indexWriter.close();
            directory.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    protected synchronized void add(T object, IndexWriter indexWriter) {
        int docNum = getDocNum(object);
        if (docNum == -1) {
            Document doc = new Document();
            Field[] indexFields = createIndexFields();
            for (Field indexField : indexFields) {
                populateIndexField(object, indexField, doc);
                if (StringUtils.isEmpty(indexField.stringValue())) {
                    indexField.setStringValue(NULL_VALUE);
                }
                doc.add(indexField);
            }
            try {
                indexWriter.addDocument(doc);
            } catch (CorruptIndexException e) {
                throw new RuntimeException(e);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        } else {
            throw new RuntimeException("Document dj existant! (docNum:" + docNum + ")");
        }
    }

    @Override
    public synchronized void update(T object) {
        delete(object);
        add(object);
    }

    @Override
    public synchronized void addOrUpdate(T object) {
        update(object);
    }

    @Override
    public synchronized void delete(T object) {
        int docNum = getDocNum(object);
        if (docNum != -1) {
            try {
                String uniqueIndexFieldName = getUniqueIndexFieldName();
                String uniqueIndexFieldValue = getUniqueIndexFieldValue(object);
                Directory directory = FSDirectory.open(indexDir);
                if (DirectoryReader.indexExists(directory)) {
                    Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
                    IndexWriter indexWriter = new IndexWriter(directory,
                            new IndexWriterConfig(Version.LUCENE_44, analyzer));
                    Term term = new Term(uniqueIndexFieldName, uniqueIndexFieldValue);
                    indexWriter.deleteDocuments(term);
                    indexWriter.close();
                }
                directory.close();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    @Override
    public synchronized void deleteAll() {
        try {
            Directory directory = FSDirectory.open(indexDir);
            Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
            IndexWriter indexWriter = new IndexWriter(directory,
                    new IndexWriterConfig(Version.LUCENE_44, analyzer));
            indexWriter.deleteAll();
            indexWriter.addDocument(new Document());
            //            indexWriter.optimize();
            indexWriter.close();
            directory.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public List<T> search(String freeTextQuery) {
        return search(freeTextQuery, true);
    }

    protected List<T> search(String freeTextQuery, boolean manageWildcards) {
        return search(freeTextQuery, manageWildcards, null, null);
    }

    @Override
    public List<T> search(String freeTextQuery, String sortField, Boolean sortAscending) {
        return search(freeTextQuery, true, sortField, sortAscending);
    }

    protected List<T> search(String freeTextQuery, boolean manageWildcards, String sortField,
            Boolean sortAscending) {
        String luceneTextQuery = freeTextQuery;
        LuceneSearchResultsProvider searchResultsProvider = new LuceneSearchResultsProvider(indexDir,
                luceneTextQuery, searchFields, sortField, sortAscending, analyzerProvider);
        return new ListSearchResults(searchResultsProvider);
    }

    @SuppressWarnings("unchecked")
    @Override
    public void release(List<T> searchResults) {
        if (searchResults instanceof BaseLuceneIndexHelper.ListSearchResults) {
            ListSearchResults listSearchResults = (ListSearchResults) searchResults;
            listSearchResults.searchResultsProvider.close();
        }
    }

    protected synchronized int getDocNum(T object) {
        int docNum;
        String uniqueIndexFieldName = getUniqueIndexFieldName();
        String uniqueIndexFieldValue = getUniqueIndexFieldValue(object);
        if (uniqueIndexFieldValue != null) {
            String query = uniqueIndexFieldName + ":" + uniqueIndexFieldValue;
            try {
                Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
                QueryParser multiFielsQP = new QueryParser(Version.LUCENE_44, uniqueIndexFieldName, analyzer);
                Query luceneQuery = multiFielsQP.parse(query);

                Directory directory = FSDirectory.open(indexDir);
                IndexReader reader = DirectoryReader.open(directory);
                IndexSearcher indexSearcher = new IndexSearcher(reader);
                TopDocs topDocs = indexSearcher.search(luceneQuery, reader.maxDoc());
                if (topDocs.totalHits > 0) {
                    docNum = topDocs.scoreDocs[0].doc;
                } else {
                    docNum = -1;
                }
                //               indexSearcher.close();
                // TODO add finally
                reader.close();
                directory.close();
            } catch (ParseException e) {
                throw new RuntimeException(e);
            } catch (CorruptIndexException e) {
                throw new RuntimeException(e);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        } else {
            docNum = -1;
        }
        return docNum;
    }

    public File getIndexDir() {
        return indexDir;
    }

    private void createIndexIfNecessary() {
        try {
            Directory directory = FSDirectory.open(indexDir);
            if (!DirectoryReader.indexExists(directory)) {
                Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
                IndexWriter indexWriter = new IndexWriter(directory,
                        new IndexWriterConfig(Version.LUCENE_44, analyzer));
                indexWriter.addDocument(new Document());
                indexWriter.close();
            }
            directory.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    protected String[] createSearchFields(Field[] indexFields) {
        String[] searchFields = new String[indexFields.length];
        for (int i = 0; i < searchFields.length; i++) {
            Field indexField = indexFields[i];
            searchFields[i] = indexField.name();
        }
        return searchFields;
    }

    @SuppressWarnings("unchecked")
    public String highlight(String strToHighlight, String fieldName, List<T> resultatsRecherche) {
        if (resultatsRecherche instanceof BaseLuceneIndexHelper.ListSearchResults) {
            ListSearchResults listResultatsRecherche = (ListSearchResults) resultatsRecherche;
            Query luceneQuery = listResultatsRecherche.searchResultsProvider.getLuceneQuery();
            return highlight(strToHighlight, fieldName, luceneQuery);
        } else {
            throw new RuntimeException(
                    "La liste passe en argument doit tre la mme qui a t retourne par la mthode search()");
        }
    }

    public String highlight(String strToHighlight, String fieldName, Query luceneQuery) {
        String highlightedText;
        Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
        try {
            Directory directory = FSDirectory.open(indexDir);
            IndexReader indexReader = DirectoryReader.open(directory);
            Query rewrittenLuceneQuery = luceneQuery.rewrite(indexReader);
            QueryScorer luceneScorer = new QueryScorer(rewrittenLuceneQuery);
            SimpleHTMLFormatter luceneFormatter = new SimpleHTMLFormatter("<span class=\"hit\">", "</span>");
            Highlighter luceneHighlighter = new Highlighter(luceneFormatter, luceneScorer);

            Fragmenter luceneFragmenter;
            // Si la chaine  highlighter est sup  250 carac
            if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) {
                // Cration de best fragments de 100 carac chaque
                luceneFragmenter = new SimpleFragmenter(TAILLE_FRAGMENT);
            } else {
                // Toute la chaine est highlight
                luceneFragmenter = new SimpleFragmenter(Integer.MAX_VALUE);
            }
            luceneHighlighter.setTextFragmenter(luceneFragmenter);

            TokenStream luceneTokenStream = analyzer.tokenStream(fieldName, new StringReader(strToHighlight));
            String fragment = null;
            if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) {
                fragment = luceneHighlighter.getBestFragments(luceneTokenStream, strToHighlight, NB_BEST_FRAGMENT,
                        FRAGMENT_SEP);
            } else {
                fragment = luceneHighlighter.getBestFragment(luceneTokenStream, strToHighlight);
            }

            if (StringUtils.isBlank(fragment) && fieldName.equalsIgnoreCase("titre")) {
                fragment = strToHighlight;
            }
            indexReader.close();
            directory.close();

            highlightedText = fragment;
        } catch (IOException e) {
            throw new RuntimeException(e);
        } catch (InvalidTokenOffsetsException e) {
            throw new RuntimeException(e);
        }
        return highlightedText;
    }

    protected static Field[] createDefaultIndexFields(String[] indexFieldNames) {
        Field[] fields = new Field[indexFieldNames.length];
        for (int i = 0; i < indexFieldNames.length; i++) {
            String indexFieldName = indexFieldNames[i];
            Field field = createDefaultIndexField(indexFieldName);
            fields[i] = field;
        }
        return fields;
    }

    protected static Field createDefaultIndexField(String indexFieldName) {
        return new TextField(indexFieldName, "", Field.Store.YES);
    }

    public synchronized final void rebuild() {
        deleteAll();
        try {
            Directory directory = FSDirectory.open(indexDir);
            Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(directory, config);
            List<T> objects = getAll();
            for (T object : objects) {
                add(object, indexWriter);
            }
            //            indexWriter.optimize();
            indexWriter.close();
            directory.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static String adjustIfNumber(Object value) {
        String result;
        if (value != null) {
            String strValue = value.toString();
            try {
                Integer.parseInt(strValue);
                StringBuffer sb = new StringBuffer(strValue);
                for (int i = 0; i < NUMBER_LENGTH - strValue.length(); i++) {
                    sb.insert(0, "0");
                }
                result = sb.toString();
            } catch (Exception e) {
                result = strValue;
            }
        } else {
            result = null;
        }
        return result;
    }

    public static String adjustIfDate(String texteLibre) {
        String result;
        try {
            Date date = new SimpleDateFormat("yyyy-MM-dd").parse(texteLibre.trim());
            result = DateTools.dateToString(date, Resolution.DAY);
        } catch (Exception e) {
            result = texteLibre;
        }
        return result;
    }

    protected abstract List<T> getAll();

    protected abstract Field[] createIndexFields();

    protected abstract void populateIndexField(T object, Field indexField, Document doc);

    protected abstract String getUniqueIndexFieldName();

    protected abstract String getUniqueIndexFieldValue(T object);

    protected abstract T toObject(int docId, Document doc);

    public class ListSearchResults extends AbstractList<T> implements Serializable {

        private LuceneSearchResultsProvider searchResultsProvider;

        private ListSearchResults(LuceneSearchResultsProvider searchResultsProvider) {
            this.searchResultsProvider = searchResultsProvider;
        }

        public LuceneSearchResultsProvider getSearchResultsProvider() {
            return searchResultsProvider;
        }

        @Override
        public int size() {
            int size;
            if (searchResultsProvider.isSearchBegun()) {
                size = searchResultsProvider.topDocs().totalHits;
            } else {
                size = 0;
            }
            return size;
        }

        @Override
        public T get(int index) {
            TopDocs topDocs = searchResultsProvider.topDocs();
            int docId = topDocs.scoreDocs[index].doc;
            Document doc = searchResultsProvider.get(docId);
            return toObject(docId, doc);
        }

        public List<String> getMatchingFieldNames(int index) {
            TopDocs topDocs = searchResultsProvider.topDocs();
            int docId = topDocs.scoreDocs[index].doc;
            return searchResultsProvider.getMatchingFieldNames(docId);
        }

        private void writeObject(ObjectOutputStream out) throws IOException {
            searchResultsProvider.close();
            out.defaultWriteObject();
        }

        @Override
        protected void finalize() throws Throwable {
            searchResultsProvider.close();
            super.finalize();
        }

    }

    protected String getAndQuery(String query) {
        String[] queryWords = query.split(" ");
        if (queryWords.length > 1) {
            StringBuffer sb = new StringBuffer();
            for (String queryWord : queryWords) {
                if (queryWord.length() > 1) {
                    sb.append("+" + queryWord + " ");
                }
            }
            query = sb.toString();
        }
        return query;
    }

    public String analyze(String str) {
        try {
            return analyze(str, analyzerProvider.getAnalyzer(Locale.FRENCH));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static String analyze(String str, Analyzer analyzer) throws IOException {
        if (analyzer == null) {
            return str;
        }
        StringBuilder norm = new StringBuilder();
        TokenStream tokens = analyzer.tokenStream("", new StringReader(str));
        tokens.reset();

        CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class);
        while (tokens.incrementToken()) {
            norm.append(termAtt.buffer(), 0, termAtt.length());
        }
        return norm.toString();
    }

    public static String escape(String text) {
        String result = text;
        // "\\" must be first
        String[] escapable = new String[] { "\\", "+", "-", "&&", "||", "!", "(", ")", "{", "}", "[", "]", "^",
                "\"", "~", /*"*",*/ "?", ":" };
        for (int i = 0; i < escapable.length; i++) {
            result = result.replace(escapable[i], "\\" + escapable[i]);
        }
        return result;
    }

    //    public static void main(String[] args) throws Exception {
    //        System.out.println(analyze("mt", new FrenchAnalyzer(Version.LUCENE_44)));
    //   }

}