org.d2.plugins.lucene.LuceneIndexer.java Source code

Java tutorial

Introduction

Here is the source code for org.d2.plugins.lucene.LuceneIndexer.java

Source

/*******************************************************************************
 * Copyright 2010 Nathan Kopp
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package org.d2.plugins.lucene;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.Version;
import org.d2.Bucket;
import org.d2.index.DocBuilderAbstract;
import org.d2.pluggable.Indexer;
import org.d2.query.D2NumericRangeTerm;
import org.d2.query.D2Query;
import org.d2.query.D2QueryNode;
import org.d2.query.D2RangeTerm;
import org.d2.query.D2Term;
import org.nkts.util.Util;

public class LuceneIndexer implements Indexer {
    private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
    private File indexDir;

    private DocBuilderAbstract docBuilder;

    LuceneManager manager;

    private Boolean stopThread = false;

    private Directory index;

    public LuceneIndexer(String rootFolder, Bucket bucket)
            throws CorruptIndexException, LockObtainFailedException, IOException {
        indexDir = new File(rootFolder + "/" + bucket.getName() + "/index");
        indexDir.mkdirs();

        //index = FSDirectory.open(indexDir);
        Directory fsDir = FSDirectory.open(indexDir);
        index = new NRTCachingDirectory(fsDir, 5.0, 60.0);

        manager = new LuceneManager(index);
        this.docBuilder = createDocBuilder();
    }

    private DocBuilderAbstract createDocBuilder() {
        return new LuceneDocBuilder();
    }

    public synchronized void indexObject(Object obj) {
        try {
            Document d = (Document) docBuilder.toDocument(obj);
            manager.updateDocument(obj, d);

        } catch (CorruptIndexException e) {
            throw Util.wrap(e);
        } catch (IOException e) {
            throw Util.wrap(e);
        } finally {
            try {
                //                closeWriter();
            } catch (Exception e) {
                throw Util.wrap(e);
            }
        }
    }

    public List<String> findIdByQuery(D2Query query) {
        LuceneReaderAndSearcher searcher = null;
        try {
            searcher = manager.getSearcher();
            List<String> pList = new ArrayList<String>();

            TopScoreDocCollector collector = TopScoreDocCollector.create(20, true);
            searcher.getSearcher().search(makeLuceneQuery(query), collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            for (int i = 0; i < hits.length; i++) {
                Document doc = searcher.getSearcher().doc(hits[i].doc);
                pList.add(doc.get("id"));
            }

            return pList;
        } catch (IOException e) {
            throw Util.wrap(e);
        } finally {
            manager.releaseSearcher(searcher);
        }
    }

    // ======================================================================
    // ======================================================================

    /**
     * TODO stop the background thread and lock everything (including all reads)
     */
    public void rebuildIndex(Collection<Object> objList) {
        try {
            manager.close();
            manager = new LuceneManager(index);

            //writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
            if (index instanceof NRTCachingDirectory)
                config.setMergeScheduler(((NRTCachingDirectory) index).getMergeScheduler());
            IndexWriter writer = new IndexWriter(manager.getIndex(), config);
            for (Object obj : objList) {
                Document d = (Document) docBuilder.toDocument(obj);
                writer.addDocument(d);
            }

            //writer.optimize();
            writer.commit();

            manager.close();
            manager = new LuceneManager(index);
        } catch (CorruptIndexException e) {
            throw Util.wrap(e);
        } catch (IOException e) {
            throw Util.wrap(e);
        }
    }

    // ======================================================================
    // ======================================================================

    protected Query makeLuceneQuery(D2Query query) {
        try {
            BooleanQuery lquery = new BooleanQuery();
            for (D2QueryNode node : query.getNodes()) {
                if (node instanceof D2Term) {
                    D2Term term = (D2Term) node;
                    switch (term.getType()) {
                    case EXACT:
                        lquery.add(new TermQuery(new Term(term.getField(), handleBlanks(term.getValue()))),
                                getLuceneOccur(node));
                        break;
                    case FUZZY:
                        lquery.add(new FuzzyQuery(new Term(term.getField(), term.getValue())),
                                getLuceneOccur(node));
                        break;
                    case PREFIX:
                        lquery.add(new PrefixQuery(new Term(term.getField(), term.getValue())),
                                getLuceneOccur(node));
                        break;
                    case WILDCARD:
                        lquery.add(new WildcardQuery(new Term(term.getField(), term.getValue())),
                                getLuceneOccur(node));
                        break;
                    case PARSED:
                        QueryParser parser = new QueryParser(Version.LUCENE_36, term.getField(), analyzer);
                        Query luceneChild = parser.parse(term.getValue());
                        lquery.add(luceneChild, getLuceneOccur(node));
                        break;
                    default:
                        throw new RuntimeException("query type not found: " + term.getType());
                    }
                } else if (node instanceof D2RangeTerm) {
                    D2RangeTerm term = (D2RangeTerm) node;
                    lquery.add(new TermRangeQuery(term.getField(), term.getValue1(), term.getValue2(),
                            term.isInclusive(), term.isInclusive()), getLuceneOccur(node));
                } else if (node instanceof D2NumericRangeTerm) {
                    D2NumericRangeTerm term = (D2NumericRangeTerm) node;
                    if (Double.class.isAssignableFrom(term.getType()))
                        lquery.add(
                                NumericRangeQuery.newDoubleRange(term.getField(), (Double) term.getValue1(),
                                        (Double) term.getValue2(), term.isInclusive(), term.isInclusive()),
                                getLuceneOccur(node));
                    else if (Long.class.isAssignableFrom(term.getType()))
                        lquery.add(
                                NumericRangeQuery.newLongRange(term.getField(), (Long) term.getValue1(),
                                        (Long) term.getValue2(), term.isInclusive(), term.isInclusive()),
                                getLuceneOccur(node));
                    else if (Float.class.isAssignableFrom(term.getType()))
                        lquery.add(
                                NumericRangeQuery.newFloatRange(term.getField(), (Float) term.getValue1(),
                                        (Float) term.getValue2(), term.isInclusive(), term.isInclusive()),
                                getLuceneOccur(node));
                    else if (Integer.class.isAssignableFrom(term.getType()))
                        lquery.add(
                                NumericRangeQuery.newIntRange(term.getField(), (Integer) term.getValue1(),
                                        (Integer) term.getValue2(), term.isInclusive(), term.isInclusive()),
                                getLuceneOccur(node));
                } else if (node instanceof D2Query) {
                    D2Query child = (D2Query) node;
                    Query luceneChild = makeLuceneQuery(child);
                    lquery.add(luceneChild, getLuceneOccur(child));
                }
            }

            //            System.out.println("LUCENE QUERY: "+lquery.toString());

            return lquery;
        } catch (ParseException e) {
            throw Util.wrap(e);
        }
    }

    private static String handleBlanks(String input) {
        if (Util.isBlank(input))
            return LuceneDocBuilder.MAGIC_BLANK_VALUE;
        else
            return input;
    }

    private static Occur getLuceneOccur(D2QueryNode node) {
        switch (node.getOccurs()) {
        case MUST:
            return BooleanClause.Occur.MUST;
        case SHOULD:
            return BooleanClause.Occur.SHOULD;
        case MUST_NOT:
            return BooleanClause.Occur.MUST_NOT;
        }
        throw new RuntimeException("bad occurs: " + node.getOccurs());
    }

    // ======================================================================
    // ======================================================================

    public void close() {
        try {
            stopThread = true;
            manager.close();
            synchronized (stopThread) {
                stopThread.notifyAll();
            }
        } catch (Exception e) {
            Util.wrap(e);
        }
    }

    public DocBuilderAbstract getDocBuilder() {
        return docBuilder;
    }

    @Override
    public void deleteDocument(String id) {
        try {
            manager.getWriter().deleteDocuments(new Term("id", id));
        } catch (Throwable t) {
            throw Util.wrap(t);
        }

    }

    @Override
    public void resetLocks() {

        try {
            if ((new File(indexDir + "/write.lock").delete())) {
                System.out.println(indexDir + "/write.lock deleted");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

}