org.meresco.lucene.Lucene.java Source code

Java tutorial

Introduction

Here is the source code for org.meresco.lucene.Lucene.java

Source

/* begin license *
 *
 * "Meresco Lucene" is a set of components and tools to integrate Lucene (based on PyLucene) into Meresco
 *
 * Copyright (C) 2015 Koninklijke Bibliotheek (KB) http://www.kb.nl
 * Copyright (C) 2015-2016 Seecr (Seek You Too B.V.) http://seecr.nl
 *
 * This file is part of "Meresco Lucene"
 *
 * "Meresco Lucene" is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * "Meresco Lucene" is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with "Meresco Lucene"; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * end license */

package org.meresco.lucene;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;

import org.apache.commons.collections4.map.LRUMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.taxonomy.CachedOrdinalsReader;
import org.apache.lucene.facet.taxonomy.DocValuesOrdinalsReader;
import org.apache.lucene.facet.taxonomy.OrdinalsReader;
import org.apache.lucene.facet.taxonomy.SearcherTaxonomyManager;
import org.apache.lucene.facet.taxonomy.SearcherTaxonomyManager.SearcherAndTaxonomy;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.queries.ChainedFilter;
import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.ReferenceManager.RefreshListener;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.Version;
import org.meresco.lucene.LuceneResponse.ClusterHit;
import org.meresco.lucene.LuceneResponse.DedupHit;
import org.meresco.lucene.LuceneResponse.DrilldownData;
import org.meresco.lucene.LuceneResponse.GroupingHit;
import org.meresco.lucene.LuceneResponse.Hit;
import org.meresco.lucene.QueryConverter.FacetRequest;
import org.meresco.lucene.search.DeDupFilterSuperCollector;
import org.meresco.lucene.search.FacetSuperCollector;
import org.meresco.lucene.search.GroupSuperCollector;
import org.meresco.lucene.search.MerescoCluster;
import org.meresco.lucene.search.MerescoCluster.DocScore;
import org.meresco.lucene.search.MerescoClusterer;
import org.meresco.lucene.search.MultiSuperCollector;
import org.meresco.lucene.search.SuperCollector;
import org.meresco.lucene.search.SuperIndexSearcher;
import org.meresco.lucene.search.TopDocSuperCollector;
import org.meresco.lucene.search.TopFieldSuperCollector;
import org.meresco.lucene.search.TopScoreDocSuperCollector;
import org.meresco.lucene.search.join.AggregateScoreSuperCollector;
import org.meresco.lucene.search.join.KeySuperCollector;
import org.meresco.lucene.search.join.ScoreSuperCollector;

public class Lucene {
    @SuppressWarnings("serial")
    public static class UninitializedException extends Exception {
    }

    public static final String ID_FIELD = "__id__";
    private int commitCount = 0;
    private Timer commitTimer;
    public String name;
    private File stateDir;
    private Map<String, CachedOrdinalsReader> cachedOrdinalsReader = new HashMap<String, CachedOrdinalsReader>();
    private DirectSpellChecker spellChecker = new DirectSpellChecker();
    LuceneData data = new LuceneData();

    public Lucene(String name, File stateDir) {
        this.name = name;
        this.stateDir = stateDir;
    }

    public Lucene(File stateDir, LuceneSettings settings) throws Exception {
        this(null, stateDir, settings);
    }

    public Lucene(String name, File stateDir, LuceneSettings settings) throws Exception {
        this.name = name;
        this.stateDir = stateDir;
        initSettings(settings);
    }

    public void initSettings(LuceneSettings settings) throws Exception {
        data.initSettings(stateDir, settings);
    }

    public LuceneSettings getSettings() throws Exception {
        return this.data.getSettings();
    }

    public boolean hasSettings() {
        return this.data.hasSettings();
    }

    public synchronized void close() throws IOException {
        if (commitTimer != null)
            commitTimer.cancel();
        this.data.close();
    }

    public void addDocument(Document doc) throws Exception {
        doc = data.getFacetsConfig().build(data.getTaxoWriter(), doc);
        data.getIndexWriter().addDocument(doc);
        maybeCommitAfterUpdate();
    }

    public void addDocument(String identifier, Document doc) throws Exception {
        doc.add(new StringField(ID_FIELD, identifier, Store.YES));
        doc = data.getFacetsConfig().build(data.getTaxoWriter(), doc);
        data.getIndexWriter().updateDocument(new Term(ID_FIELD, identifier), doc);
        maybeCommitAfterUpdate();
    }

    public void deleteDocument(String identifier) throws Exception {
        data.getIndexWriter().deleteDocuments(new Term(ID_FIELD, identifier));
        maybeCommitAfterUpdate();
    }

    public void maybeCommitAfterUpdate() throws Exception {
        commitCount++;
        LuceneSettings settings = data.getSettings();
        if (commitCount >= settings.commitCount) {
            commit();
            return;
        }
        if (commitTimer == null) {
            TimerTask timerTask = new TimerTask() {
                public void run() {
                    try {
                        commit();
                    } catch (Exception e) {
                        throw new RuntimeException();
                    }
                }
            };
            commitTimer = new Timer();
            commitTimer.schedule(timerTask, settings.commitTimeout * 1000);
        }
    }

    public synchronized void commit() throws Exception {
        commitCount = 0;
        if (commitTimer != null) {
            commitTimer.cancel();
            commitTimer.purge();
            commitTimer = null;
        }
        data.commit();
    }

    public LuceneResponse executeQuery(QueryData q) throws Throwable {
        return executeQuery(q, null, null, null, null, null);
    }

    public LuceneResponse executeQuery(Query query) throws Throwable {
        QueryData q = new QueryData();
        q.query = query;
        return executeQuery(q, null, null, null, null, null);
    }

    public LuceneResponse executeQuery(Query query, int start, int stop) throws Throwable {
        QueryData q = new QueryData();
        q.query = query;
        q.start = start;
        q.stop = stop;
        return executeQuery(q, null, null, null, null, null);
    }

    public LuceneResponse executeQuery(Query query, List<FacetRequest> facets) throws Throwable {
        QueryData q = new QueryData();
        q.query = query;
        q.facets = facets;
        return executeQuery(q, null, null, null, null, null);
    }

    public LuceneResponse executeQuery(QueryData q, List<Query> filterQueries, List<String[]> drilldownQueries,
            List<Filter> filters, List<AggregateScoreSuperCollector> scoreCollectors,
            Collection<KeySuperCollector> keyCollectors) throws Throwable {
        int totalHits;
        List<LuceneResponse.Hit> hits;
        Collectors collectors = null;
        Map<String, Long> times = new HashMap<>();
        long t0 = System.currentTimeMillis();
        int topCollectorStop = q.stop;
        SearcherAndTaxonomy reference = data.getManager().acquire();
        try {
            while (true) {
                ClusterConfig clusterConfig = null;
                if (q.clustering) {
                    clusterConfig = q.clusterConfig;
                    if (clusterConfig == null) {
                        clusterConfig = data.getSettings().clusterConfig;
                    }
                }
                collectors = createCollectors(q,
                        topCollectorStop + (q.clustering ? clusterConfig.clusterMoreRecords : 0), keyCollectors,
                        scoreCollectors, reference);
                Filter f = filtersFor(filterQueries, filters == null ? null : filters.toArray(new Filter[0]));

                Query query = q.query;
                if (drilldownQueries != null)
                    query = createDrilldownQuery(query, drilldownQueries);
                long t1 = System.currentTimeMillis();
                ((SuperIndexSearcher) reference.searcher).search(query, f, collectors.root);
                times.put("searchTime", System.currentTimeMillis() - t1);

                totalHits = collectors.topCollector.getTotalHits();
                if (q.clustering) {
                    t1 = System.currentTimeMillis();
                    hits = clusterTopDocsResponse(q, collectors, times, reference.searcher.getIndexReader(),
                            clusterConfig);
                    times.put("totalClusterTime", System.currentTimeMillis() - t1);
                } else {
                    t1 = System.currentTimeMillis();
                    hits = topDocsResponse(q, collectors);
                    times.put("topDocsTime", System.currentTimeMillis() - t1);
                }

                if (hits.size() == q.stop - q.start || topCollectorStop >= totalHits)
                    break;
                topCollectorStop *= 10;
                if (topCollectorStop > 10000) {
                    break;
                }
            }

            LuceneResponse response = new LuceneResponse(totalHits);
            if (collectors.dedupCollector != null)
                response.totalWithDuplicates = collectors.dedupCollector.getTotalHits();

            response.hits = hits;

            if (collectors.facetCollector != null) {
                long t1 = System.currentTimeMillis();
                response.drilldownData = facetResult(collectors.facetCollector, q.facets);
                times.put("facetTime", System.currentTimeMillis() - t1);
            }

            if (q.suggestionRequest != null) {
                long t1 = System.currentTimeMillis();
                HashMap<String, SuggestWord[]> result = new HashMap<>();
                for (String suggest : q.suggestionRequest.suggests)
                    result.put(suggest, suggest(suggest, q.suggestionRequest.count, q.suggestionRequest.field));
                times.put("suggestionTime", System.currentTimeMillis() - t1);
                response.suggestions = result;
            }
            response.times = times;
            response.queryTime = System.currentTimeMillis() - t0;
            return response;
        } finally {
            data.getManager().release(reference);
        }
    }

    private List<Hit> clusterTopDocsResponse(QueryData q, Collectors collectors, Map<String, Long> times,
            IndexReader indexReader, ClusterConfig clusterConfig) throws Exception {
        int totalHits = collectors.topCollector.getTotalHits();
        TopDocs topDocs = collectors.topCollector.topDocs(q.start);

        MerescoClusterer clusterer = new MerescoClusterer(indexReader, clusterConfig,
                this.getSettings().interpolateEpsilon, totalHits, q.stop - q.start);
        long t0 = System.currentTimeMillis();
        clusterer.processTopDocs(topDocs);
        times.put("processTopDocsForClustering", System.currentTimeMillis() - t0);
        t0 = System.currentTimeMillis();
        clusterer.finish();
        times.put("clusteringAlgorithm", System.currentTimeMillis() - t0);

        List<LuceneResponse.Hit> hits = new ArrayList<>();
        int count = q.start;
        HashSet<Integer> seenDocIds = new HashSet<>();
        t0 = System.currentTimeMillis();
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            if (count >= q.stop)
                break;
            if (seenDocIds.contains(scoreDoc.doc))
                continue;

            Integer representative = null;
            MerescoCluster cluster = clusterer.cluster(scoreDoc.doc);
            if (cluster == null) {
                representative = scoreDoc.doc;
                seenDocIds.add(representative);
            } else {
                for (DocScore ds : cluster.topDocs) {
                    if (representative == null) {
                        representative = ds.docId;
                    }
                    seenDocIds.add(ds.docId);
                }
            }

            ClusterHit hit = new ClusterHit(getDocument(representative).get(ID_FIELD), scoreDoc.score);
            if (cluster != null) {
                hit.topTerms = cluster.topTerms;
                hit.topDocs = cluster.topDocs;
                for (DocScore docScore : cluster.topDocs) {
                    docScore.identifier = getDocument(docScore.docId).get(ID_FIELD);
                }
            }
            hits.add(hit);
            count += 1;
        }
        times.put("collectClusters", System.currentTimeMillis() - t0);
        return hits;
    }

    private List<Hit> topDocsResponse(QueryData q, Collectors collectors) throws Exception {
        int totalHits = collectors.topCollector.getTotalHits();

        DeDupFilterSuperCollector dedupCollector = collectors.dedupCollector;
        GroupSuperCollector groupingCollector = collectors.groupingCollector;

        HashSet<String> seenIds = new HashSet<>();
        int count = q.start;
        List<LuceneResponse.Hit> hits = new ArrayList<>();
        for (ScoreDoc scoreDoc : collectors.topCollector.topDocs(q.stop == 0 ? 1 : q.start).scoreDocs) { //TODO: temp fix for start/stop = 0
            if (count >= q.stop)
                break;
            if (dedupCollector != null) {
                DeDupFilterSuperCollector.Key keyForDocId = dedupCollector.keyForDocId(scoreDoc.doc);
                int newDocId = keyForDocId == null ? scoreDoc.doc : keyForDocId.getDocId();
                DedupHit hit = new DedupHit(getDocument(newDocId).get(ID_FIELD), scoreDoc.score);
                hit.duplicateField = dedupCollector.getKeyName();
                hit.duplicateCount = 1;
                if (keyForDocId != null)
                    hit.duplicateCount = keyForDocId.getCount();
                hit.score = scoreDoc.score;
                hits.add(hit);
            } else if (groupingCollector != null) {
                GroupingHit hit = new GroupingHit(getDocument(scoreDoc.doc).get(ID_FIELD), scoreDoc.score);
                if (seenIds.contains(hit.id))
                    continue;

                List<String> duplicateIds = new ArrayList<>();
                duplicateIds.add(hit.id);
                if (totalHits > (q.stop - q.start)) {
                    List<Integer> groupedDocIds = groupingCollector.group(scoreDoc.doc);
                    if (groupedDocIds != null)
                        for (int docId : groupedDocIds) {
                            String id = getDocument(docId).get(ID_FIELD);
                            if (!id.equals(hit.id))
                                duplicateIds.add(id);
                        }
                }
                seenIds.addAll(duplicateIds);
                hit.groupingField = groupingCollector.getKeyName();
                hit.duplicates = duplicateIds;
                hit.score = scoreDoc.score;
                hits.add(hit);
            } else {
                Hit hit = new Hit(getDocument(scoreDoc.doc).get(ID_FIELD), scoreDoc.score);
                hits.add(hit);
            }
            count++;
        }
        return hits;
    }

    public List<DrilldownData> facets(List<FacetRequest> facets, List<Query> filterQueries,
            List<String[]> drilldownQueries, Filter filter) throws Throwable {
        SearcherAndTaxonomy reference = data.getManager().acquire();
        try {
            FacetSuperCollector facetCollector = facetCollector(facets, reference.taxonomyReader);
            if (facetCollector == null)
                return new ArrayList<DrilldownData>();
            Filter filter_ = filtersFor(filterQueries, filter);
            Query query = new MatchAllDocsQuery();
            if (drilldownQueries != null)
                query = createDrilldownQuery(query, drilldownQueries);
            ((SuperIndexSearcher) reference.searcher).search(query, filter_, facetCollector);
            return facetResult(facetCollector, facets);
        } finally {
            data.getManager().release(reference);
        }
    }

    public Filter filterQuery(Query query) throws Exception {
        Filter f = data.getFilterCache().get(query);
        if (f != null) {
            return f;
        }
        CachingWrapperFilter filter = new CachingWrapperFilter(new QueryWrapperFilter(query));
        data.getFilterCache().put(query, filter);
        return filter;
    }

    private Filter filtersFor(List<Query> filterQueries, Filter... filter) throws Exception {
        List<Filter> filters = new ArrayList<Filter>();
        if (filterQueries != null)
            for (Query query : filterQueries)
                filters.add(filterQuery(query));
        if (filter != null)
            for (Filter f : filter)
                if (f != null)
                    filters.add(f);
        if (filters.size() == 0)
            return null;
        return new ChainedFilter(filters.toArray(new Filter[0]), ChainedFilter.AND);
    }

    public Document getDocument(int docID) throws Exception {
        SearcherAndTaxonomy reference = data.getManager().acquire();
        try {
            return ((SuperIndexSearcher) reference.searcher).doc(docID);
        } finally {
            data.getManager().release(reference);
        }
    }

    private Collectors createCollectors(QueryData q, int stop, Collection<KeySuperCollector> keyCollectors,
            List<AggregateScoreSuperCollector> scoreCollectors, SearcherAndTaxonomy reference) throws Exception {
        Collectors allCollectors = new Collectors();
        SuperCollector<?> resultsCollector;
        if (q.groupingField != null) {
            allCollectors.topCollector = topCollector(q.start, stop * 10, q.sort);
            allCollectors.groupingCollector = new GroupSuperCollector(q.groupingField, allCollectors.topCollector);
            resultsCollector = allCollectors.groupingCollector;
        } else if (q.dedupField != null) {
            allCollectors.topCollector = topCollector(q.start, stop, q.sort);
            allCollectors.dedupCollector = new DeDupFilterSuperCollector(q.dedupField, q.dedupSortField,
                    allCollectors.topCollector);
            resultsCollector = allCollectors.dedupCollector;
        } else {
            allCollectors.topCollector = topCollector(q.start, stop, q.sort);
            resultsCollector = allCollectors.topCollector;
        }
        allCollectors.facetCollector = facetCollector(q.facets, reference.taxonomyReader);

        List<SuperCollector<?>> collectors = new ArrayList<SuperCollector<?>>();
        collectors.add(resultsCollector);
        if (allCollectors.facetCollector != null) {
            collectors.add(allCollectors.facetCollector);
        }
        if (keyCollectors != null)
            collectors.addAll(keyCollectors);
        allCollectors.root = new MultiSuperCollector(collectors);

        if (scoreCollectors != null && scoreCollectors.size() > 0) {
            for (AggregateScoreSuperCollector scoreCollector : scoreCollectors) {
                scoreCollector.setDelegate(allCollectors.root);
                allCollectors.root = scoreCollector;
            }
        }
        return allCollectors;
    }

    private TopDocSuperCollector topCollector(int start, int stop, Sort sort) {
        if (stop <= start)
            //TODO: temp fix for start/stop = 0; You should use TotalHitCountSuperCollector
            return new TopScoreDocSuperCollector(stop == 0 ? 1 : stop, true);
        //            return new TotalHitCountSuperCollector();
        if (sort == null)
            return new TopScoreDocSuperCollector(stop, true);
        return new TopFieldSuperCollector(sort, stop, true, false, true);
    }

    private FacetSuperCollector facetCollector(List<FacetRequest> facets, TaxonomyReader taxonomyReader)
            throws Exception {
        if (facets == null || facets.size() == 0)
            return null;
        String[] indexFieldnames = getIndexFieldNames(facets);
        FacetSuperCollector collector = new FacetSuperCollector(taxonomyReader, data.getFacetsConfig(),
                getOrdinalsReader(indexFieldnames[0]));
        for (int i = 1; i < indexFieldnames.length; i++) {
            collector.addOrdinalsReader(getOrdinalsReader(indexFieldnames[i]));
        }
        return collector;
    }

    String[] getIndexFieldNames(List<FacetRequest> facets) throws Exception {
        Set<String> indexFieldnames = new HashSet<String>();
        for (FacetRequest f : facets)
            indexFieldnames.add(this.data.getFacetsConfig().getDimConfig(f.fieldname).indexFieldName);
        return indexFieldnames.toArray(new String[0]);
    }

    private OrdinalsReader getOrdinalsReader(String indexFieldname) {
        CachedOrdinalsReader reader = cachedOrdinalsReader.get(indexFieldname);
        if (reader == null) {
            DocValuesOrdinalsReader docValuesReader = indexFieldname == null ? new DocValuesOrdinalsReader()
                    : new DocValuesOrdinalsReader(indexFieldname);
            reader = new CachedOrdinalsReader(docValuesReader);
            cachedOrdinalsReader.put(indexFieldname, reader);
        }
        return reader;
    }

    private List<DrilldownData> facetResult(FacetSuperCollector facetCollector, List<FacetRequest> facets)
            throws Exception {
        List<DrilldownData> drilldownData = new ArrayList<DrilldownData>();
        for (FacetRequest facet : facets) {
            DrilldownData dd = new DrilldownData(facet.fieldname);
            dd.path = facet.path;
            List<DrilldownData.Term> terms = drilldownDataFromFacetResult(facetCollector, facet, facet.path,
                    this.data.getFacetsConfig().getDimConfig(facet.fieldname).hierarchical);
            if (terms != null) {
                dd.terms = terms;
                drilldownData.add(dd);
            }
        }
        return drilldownData;
    }

    public List<DrilldownData.Term> drilldownDataFromFacetResult(FacetSuperCollector facetCollector,
            FacetRequest facet, String[] path, boolean hierarchical) throws IOException {
        FacetResult result = facetCollector.getTopChildren(facet.maxTerms == 0 ? Integer.MAX_VALUE : facet.maxTerms,
                facet.fieldname, path);
        if (result == null)
            return null;
        List<DrilldownData.Term> terms = new ArrayList<DrilldownData.Term>();
        for (LabelAndValue l : result.labelValues) {
            DrilldownData.Term term = new DrilldownData.Term(l.label, l.value.intValue());
            if (hierarchical) {
                String[] newPath = new String[path.length + 1];
                System.arraycopy(path, 0, newPath, 0, path.length);
                newPath[newPath.length - 1] = l.label;
                term.subTerms = drilldownDataFromFacetResult(facetCollector, facet, newPath, hierarchical);
            }
            terms.add(term);
        }
        return terms;
    }

    public List<TermCount> termsForField(String field, String prefix, int limit) throws Exception {

        //        if t == str:
        //            convert = lambda term: term.utf8ToString()
        //        elif t == int:
        //            convert = lambda term: NumericUtils.prefixCodedToInt(term)
        //        elif t == long:
        //            convert = lambda term: NumericUtils.prefixCodedToLong(term)
        //        elif t == float:
        //            convert = lambda term: NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term))

        SearcherAndTaxonomy reference = data.getManager().acquire();
        try {
            List<TermCount> terms = new ArrayList<TermCount>();
            IndexReader reader = reference.searcher.getIndexReader();
            Terms termsEnum = MultiFields.getTerms(reader, field);
            if (termsEnum == null)
                return terms;
            TermsEnum iterator = termsEnum.iterator(null);
            if (prefix != null) {
                iterator.seekCeil(new BytesRef(prefix));
                terms.add(new TermCount(iterator.term().utf8ToString(), iterator.docFreq()));
            }
            while (terms.size() < limit) {
                BytesRef next = iterator.next();
                if (next == null)
                    break;
                String term = next.utf8ToString();
                if (prefix != null && !term.startsWith(prefix)) {
                    break;
                }
                terms.add(new TermCount(term, iterator.docFreq()));
            }
            return terms;
        } finally {
            data.getManager().release(reference);
        }
    }

    public int numDocs() throws Exception {
        return this.data.getIndexWriter().numDocs();
    }

    public int maxDoc() throws Exception {
        return this.data.getIndexWriter().maxDoc();
    }

    public List<String> fieldnames() throws Exception {
        SearcherAndTaxonomy reference = data.getManager().acquire();
        try {
            List<String> fieldnames = new ArrayList<String>();
            Fields fields = MultiFields.getFields(reference.searcher.getIndexReader());
            if (fields == null)
                return fieldnames;
            for (Iterator<String> iterator = fields.iterator(); iterator.hasNext();) {
                fieldnames.add(iterator.next());
            }
            return fieldnames;
        } finally {
            data.getManager().release(reference);
        }
    }

    public List<String> drilldownFieldnames(int limit, String dim, String... path) throws Exception {
        SearcherAndTaxonomy reference = data.getManager().acquire();
        try {
            DirectoryTaxonomyReader taxoReader = reference.taxonomyReader;
            int parentOrdinal = dim == null ? TaxonomyReader.ROOT_ORDINAL : taxoReader.getOrdinal(dim, path);
            ChildrenIterator childrenIter = taxoReader.getChildren(parentOrdinal);
            List<String> fieldnames = new ArrayList<String>();
            while (true) {
                int ordinal = childrenIter.next();
                if (ordinal == TaxonomyReader.INVALID_ORDINAL)
                    break;
                String[] components = taxoReader.getPath(ordinal).components;
                fieldnames.add(components[components.length - 1]);
                if (fieldnames.size() >= limit)
                    break;
            }
            return fieldnames;
        } finally {
            data.getManager().release(reference);
        }
    }

    public void search(Query query, Query filterQuery, SuperCollector<?> collector) throws Throwable {
        Filter filter_ = null;
        if (filterQuery != null)
            filter_ = new QueryWrapperFilter(filterQuery);
        SearcherAndTaxonomy reference = data.getManager().acquire();
        try {
            ((SuperIndexSearcher) reference.searcher).search(query, filter_, collector);
        } finally {
            data.getManager().release(reference);
        }
    }

    public OpenBitSet collectKeys(Query filterQuery, String keyName, Query query) throws Throwable {
        return collectKeys(filterQuery, keyName, query, true);
    }

    public OpenBitSet collectKeys(Query filterQuery, String keyName, Query query, boolean cacheCollectedKeys)
            throws Throwable {
        if (cacheCollectedKeys) {
            KeyNameQuery keyNameQuery = new KeyNameQuery(keyName, filterQuery);
            OpenBitSet keys = data.getKeyCollectorCache().get(keyNameQuery);
            if (keys == null) {
                keys = doCollectKeys(filterQuery, keyName, query);
                data.getKeyCollectorCache().put(keyNameQuery, keys);
            }
            return keys;
        }
        return doCollectKeys(filterQuery, keyName, query);
    }

    private OpenBitSet doCollectKeys(Query filterQuery, String keyName, Query query) throws Throwable {
        KeySuperCollector keyCollector = new KeySuperCollector(keyName);
        if (query == null)
            query = new MatchAllDocsQuery();
        search(query, filterQuery, keyCollector);
        return keyCollector.getCollectedKeys();
    }

    public Query createDrilldownQuery(Query luceneQuery, List<String[]> drilldownQueries) throws Exception {
        BooleanQuery q = new BooleanQuery(true);
        if (luceneQuery != null)
            q.add(luceneQuery, Occur.MUST);
        for (int i = 0; i < drilldownQueries.size(); i += 2) {
            String field = drilldownQueries.get(i)[0];
            String indexFieldName = data.getFacetsConfig().getDimConfig(field).indexFieldName;
            q.add(new TermQuery(DrillDownQuery.term(indexFieldName, field, drilldownQueries.get(i + 1))),
                    Occur.MUST);
        }
        return q;
    }

    public QueryConverter getQueryConverter() throws Exception {
        return new QueryConverter(this.data.getFacetsConfig());
    }

    public ScoreSuperCollector scoreCollector(String keyName, Query query) throws Throwable {
        KeyNameQuery keyNameQuery = new KeyNameQuery(keyName, query);
        ScoreSuperCollector scoreCollector = data.getScoreCollectorCache().get(keyNameQuery);
        if (scoreCollector == null) {
            scoreCollector = doScoreCollecting(keyName, query);
            data.getScoreCollectorCache().put(keyNameQuery, scoreCollector);
        }
        return scoreCollector;
    }

    public ScoreSuperCollector doScoreCollecting(String keyName, Query query) throws Throwable {
        SearcherAndTaxonomy reference = data.getManager().acquire();
        try {
            ScoreSuperCollector scoreCollector = new ScoreSuperCollector(keyName);
            ((SuperIndexSearcher) reference.searcher).search(query, null, scoreCollector);
            return scoreCollector;
        } finally {
            data.getManager().release(reference);
        }
    }

    public SuggestWord[] suggest(String term, int count, String field) throws Exception {
        SearcherAndTaxonomy reference = data.getManager().acquire();
        try {
            return spellChecker.suggestSimilar(new Term(field, term), count, reference.searcher.getIndexReader());
        } finally {
            data.getManager().release(reference);
        }
    }

    public LuceneResponse similarDocuments(String identifier) throws Throwable {
        SearcherAndTaxonomy reference = data.getManager().acquire();
        try {
            Query idQuery = new TermQuery(new Term(ID_FIELD, identifier));
            TopDocs topDocs = reference.searcher.search(idQuery, 1);
            if (topDocs.totalHits == 0)
                return new LuceneResponse(0);
            int docId = topDocs.scoreDocs[0].doc;
            IndexReader reader = reference.searcher.getIndexReader();
            CommonTermsQuery commonQuery = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, 0.1f);
            Fields termVectors = reader.getTermVectors(docId);
            if (termVectors == null)
                return new LuceneResponse(0);
            for (String field : termVectors) {
                TermsEnum iterator = termVectors.terms(field).iterator(null);
                BytesRef b;
                while ((b = iterator.next()) != null) {
                    Term term = new Term(field, b.utf8ToString());
                    commonQuery.add(term);
                }
            }
            BooleanQuery query = new BooleanQuery();
            query.add(idQuery, Occur.MUST_NOT);
            query.add(commonQuery, Occur.MUST);
            return executeQuery(query);
        } finally {
            data.getManager().release(reference);
        }
    }

    public static class Collectors {
        public GroupSuperCollector groupingCollector;
        public DeDupFilterSuperCollector dedupCollector;
        public TopDocSuperCollector topCollector;
        public FacetSuperCollector facetCollector;
        public SuperCollector<?> root;
    }

    public class TermCount {
        public String term;
        public int count;

        public TermCount(String term, int count) {
            this.term = term;
            this.count = count;
        }
    }

    private static class KeyNameQuery {
        private String keyName;
        private Query query;

        public KeyNameQuery(String keyName, Query query) {
            this.keyName = keyName;
            this.query = query;
        }

        @Override
        public int hashCode() {
            return keyName.hashCode() + 127 * query.hashCode();
        }

        @Override
        public boolean equals(Object obj) {
            if (obj instanceof KeyNameQuery) {
                KeyNameQuery other = (KeyNameQuery) obj;
                return other.keyName.equals(keyName) && other.query.equals(query);
            }
            return false;
        }
    }

    static class LuceneData {
        private IndexWriter indexWriter;
        private DirectoryTaxonomyWriter taxoWriter;
        private LuceneSettings settings;
        private Map<Query, Filter> filterCache;
        private Map<KeyNameQuery, ScoreSuperCollector> scoreCollectorCache;
        private Map<KeyNameQuery, OpenBitSet> keyCollectorCache;
        private SearcherTaxonomyManager manager;
        private LuceneRefreshListener refreshListener = new LuceneRefreshListener();

        public void commit() throws Exception {
            this.indexWriter.commit();
            this.taxoWriter.commit();
            this.manager.maybeRefreshBlocking();
            if (this.refreshListener.isRefreshed()) {
                this.scoreCollectorCache.clear();
                this.keyCollectorCache.clear();
            }
        }

        public void close() throws IOException {
            if (this.settings == null)
                return;
            if (this.manager != null)
                this.manager.close();
            if (this.taxoWriter != null)
                this.taxoWriter.close();
            if (this.indexWriter != null)
                this.indexWriter.close();
        }

        public void initSettings(File stateDir, LuceneSettings settings) throws Exception {
            if (this.settings != null)
                throw new Exception("Init settings is only allowed once");
            this.settings = settings;

            MMapDirectory indexDirectory = new MMapDirectory(new File(stateDir, "index"));
            indexDirectory.setUseUnmap(false);

            MMapDirectory taxoDirectory = new MMapDirectory(new File(stateDir, "taxo"));
            taxoDirectory.setUseUnmap(false);

            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_4, settings.analyzer);
            config.setSimilarity(settings.similarity);
            TieredMergePolicy mergePolicy = new TieredMergePolicy();
            mergePolicy.setMaxMergeAtOnce(settings.maxMergeAtOnce);
            mergePolicy.setSegmentsPerTier(settings.segmentsPerTier);
            config.setMergePolicy(mergePolicy);

            this.indexWriter = new IndexWriter(indexDirectory, config);
            this.indexWriter.commit();
            this.taxoWriter = new DirectoryTaxonomyWriter(taxoDirectory,
                    IndexWriterConfig.OpenMode.CREATE_OR_APPEND,
                    new LruTaxonomyWriterCache(settings.lruTaxonomyWriterCacheSize));
            this.taxoWriter.commit();

            this.filterCache = Collections.synchronizedMap(new LRUMap<Query, Filter>(50));
            this.scoreCollectorCache = Collections
                    .synchronizedMap(new LRUMap<KeyNameQuery, ScoreSuperCollector>(50));
            this.keyCollectorCache = Collections.synchronizedMap(new LRUMap<KeyNameQuery, OpenBitSet>(50));

            this.manager = new SearcherTaxonomyManager(indexDirectory, taxoDirectory,
                    new MerescoSearchFactory(indexDirectory, taxoDirectory, settings));
            this.manager.addListener(refreshListener);
        }

        public IndexWriter getIndexWriter() throws UninitializedException {
            if (this.settings == null)
                throw new UninitializedException();
            return indexWriter;
        }

        public DirectoryTaxonomyWriter getTaxoWriter() throws UninitializedException {
            if (this.settings == null)
                throw new UninitializedException();
            return taxoWriter;
        }

        public FacetsConfig getFacetsConfig() throws UninitializedException {
            if (this.settings == null)
                throw new UninitializedException();
            return this.settings.facetsConfig;
        }

        public LuceneSettings getSettings() throws UninitializedException {
            if (this.settings == null)
                throw new UninitializedException();
            return settings;
        }

        public boolean hasSettings() {
            return this.settings != null;
        }

        public Map<Query, Filter> getFilterCache() throws UninitializedException {
            if (this.settings == null)
                throw new UninitializedException();
            return filterCache;
        }

        public Map<KeyNameQuery, ScoreSuperCollector> getScoreCollectorCache() throws UninitializedException {
            if (this.settings == null)
                throw new UninitializedException();
            return scoreCollectorCache;
        }

        public Map<KeyNameQuery, OpenBitSet> getKeyCollectorCache() throws UninitializedException {
            if (this.settings == null)
                throw new UninitializedException();
            return keyCollectorCache;
        }

        public SearcherTaxonomyManager getManager() throws UninitializedException {
            if (this.settings == null)
                throw new UninitializedException();
            return manager;
        }

        private class LuceneRefreshListener implements RefreshListener {
            private boolean refreshed;

            @Override
            public void afterRefresh(boolean didRefresh) throws IOException {
                if (!refreshed)
                    refreshed = didRefresh;
            }

            @Override
            public void beforeRefresh() throws IOException {
            }

            public boolean isRefreshed() {
                if (refreshed) {
                    refreshed = false;
                    return true;
                }
                return false;
            }
        }
    }
}