 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
package org.eclipse.rdf4j.sail.lucene;

import java.lang.reflect.UndeclaredThrowableException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.util.Bits;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.URI;
import org.eclipse.rdf4j.model.vocabulary.GEOF;
import org.eclipse.rdf4j.query.MalformedQueryException;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.lucene.util.GeoUnits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.context.SpatialContextFactory;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Shape;

 * A LuceneIndex is a one-stop-shop abstraction of a Lucene index. It takes care of proper synchronization of
 * IndexReaders, IndexWriters and IndexSearchers in a way that is suitable for a LuceneSail.
 * @see LuceneSail
public class LuceneIndex extends AbstractLuceneIndex {

    static {
        // do NOT set this to Integer.MAX_VALUE, because this breaks fuzzy
        // queries
        BooleanQuery.setMaxClauseCount(1024 * 1024);

    private static final String GEO_FIELD_PREFIX = "_geo_";

    private final Logger logger = LoggerFactory.getLogger(getClass());

     * The Directory that holds the Lucene index files.
    private volatile Directory directory;

     * The Analyzer used to tokenize strings and queries.
    private volatile Analyzer analyzer;

    private volatile Analyzer queryAnalyzer;

     * The IndexWriter that can be used to alter the index' contents. Created lazily.
    private volatile IndexWriter indexWriter;

     * This holds IndexReader and IndexSearcher.
    protected volatile ReaderMonitor currentMonitor;

    private volatile Function<? super String, ? extends SpatialStrategy> geoStrategyMapper;

    private final AtomicBoolean closed = new AtomicBoolean(false);

    public LuceneIndex() {

     * Creates a new LuceneIndex.
     * @param directory
     *        The Directory in which an index can be found and/or in which index files are written.
     * @param analyzer
     *        The Analyzer that will be used for tokenizing strings to index and queries.
     * @throws IOException
     *         When the Directory could not be unlocked.
    public LuceneIndex(Directory directory, Analyzer analyzer) throws IOException { = directory;
        this.analyzer = analyzer;
        this.geoStrategyMapper = createSpatialStrategyMapper(Collections.<String, String>emptyMap());


    public synchronized void initialize(Properties parameters) throws Exception {
        super.initialize(parameters); = createDirectory(parameters);
        this.analyzer = createAnalyzer(parameters);
        // slightly hacky cast to cope with the fact that Properties is
        // Map<Object,Object>
        // even though it is effectively Map<String,String>
        this.geoStrategyMapper = createSpatialStrategyMapper((Map<String, String>) (Map<?, ?>) parameters);


    protected Directory createDirectory(Properties parameters) throws IOException {
        Directory dir;
        if (parameters.containsKey(LuceneSail.LUCENE_DIR_KEY)) {
            dir =;
        } else if (parameters.containsKey(LuceneSail.LUCENE_RAMDIR_KEY)
                && "true".equals(parameters.getProperty(LuceneSail.LUCENE_RAMDIR_KEY))) {
            dir = new RAMDirectory();
        } else {
            throw new IOException("No luceneIndex set, and no '" + LuceneSail.LUCENE_DIR_KEY + "' or '"
                    + LuceneSail.LUCENE_RAMDIR_KEY + "' parameter given. ");
        return dir;

    protected Analyzer createAnalyzer(Properties parameters) throws Exception {
        Analyzer analyzer;
        if (parameters.containsKey(LuceneSail.ANALYZER_CLASS_KEY)) {
            analyzer = (Analyzer) Class.forName(parameters.getProperty(LuceneSail.ANALYZER_CLASS_KEY))
        } else {
            analyzer = new StandardAnalyzer();
        return analyzer;

    private void postInit() throws IOException {
        this.queryAnalyzer = new StandardAnalyzer();

        // do some initialization for new indices
        if (!DirectoryReader.indexExists(directory)) {
            logger.debug("creating new Lucene index in directory {}", directory);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
            IndexWriter writer = new IndexWriter(directory, indexWriterConfig);

    protected Function<String, ? extends SpatialStrategy> createSpatialStrategyMapper(
            Map<String, String> parameters) {
        ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
        SpatialContext geoContext = SpatialContextFactory.makeSpatialContext(parameters, classLoader);
        final SpatialPrefixTree spt = SpatialPrefixTreeFactory.makeSPT(parameters, classLoader, geoContext);
        return new Function<String, SpatialStrategy>() {

            public SpatialStrategy apply(String field) {
                return new RecursivePrefixTreeStrategy(spt, GEO_FIELD_PREFIX + field);


    protected SpatialContext getSpatialContext(String property) {
        return geoStrategyMapper.apply(property).getSpatialContext();

    // //////////////////////////////// Setters and getters

    public Directory getDirectory() {
        return directory;

    public Analyzer getAnalyzer() {
        return analyzer;

    public Function<? super String, ? extends SpatialStrategy> getSpatialStrategyMapper() {
        return geoStrategyMapper;

    // //////////////////////////////// Methods for controlled index access
    // For quick'n'easy access to reader, the indexreader is returned directly
    // result LuceneQueryIterators use the more elaborate
    // ReaderMonitor directly to be able to close the reader when they
    // are done.

    public synchronized IndexReader getIndexReader() throws IOException {
        if (closed.get()) {
            throw new SailException("Index has been closed");
        return getIndexSearcher().getIndexReader();

    public synchronized IndexSearcher getIndexSearcher() throws IOException {
        if (closed.get()) {
            throw new SailException("Index has been closed");
        return getCurrentMonitor().getIndexSearcher();

     * Current monitor holds instance of IndexReader and IndexSearcher It is used to keep track of readers
    public synchronized ReaderMonitor getCurrentMonitor() {
        if (closed.get()) {
            throw new SailException("Index has been closed");
        if (currentMonitor == null) {
            currentMonitor = new ReaderMonitor(this, directory);
        return currentMonitor;

    public synchronized IndexWriter getIndexWriter() throws IOException {
        if (closed.get()) {
            throw new SailException("Index has been closed");
        if (indexWriter == null) {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
            indexWriter = new IndexWriter(directory, indexWriterConfig);
        return indexWriter;

    public void shutDown() throws IOException {
        // try-finally setup ensures that closing of an instance is not skipped
        // when an earlier instance resulted in an IOException
        // FIXME: is there a more elegant way to ensure this?
        if (closed.compareAndSet(false, true)) {
            try {
                // This close oldMonitors which hold InderReader and
                // IndexSeracher
                // Monitor close IndexReader and IndexSearcher
                ReaderMonitor toCloseCurrentMonitor = currentMonitor;
                currentMonitor = null;
                if (toCloseCurrentMonitor != null) {
            } finally {
                List<Throwable> exceptions = new ArrayList<>();
                try {
                    synchronized (oldmonitors) {
                        if (oldmonitors.size() > 0) {
                                    "LuceneSail: On shutdown {} IndexReaders were not closed. This is due to non-closed Query Iterators, which must be closed!",
                        for (AbstractReaderMonitor monitor : oldmonitors) {
                            try {
                            } catch (Throwable e) {
                } finally {
                    try {
                        IndexWriter toCloseIndexWriter = indexWriter;
                        indexWriter = null;
                        if (toCloseIndexWriter != null) {
                    } finally {
                        if (!exceptions.isEmpty()) {
                            throw new UndeclaredThrowableException(exceptions.get(0));

    // //////////////////////////////// Methods for updating the index

    protected synchronized SearchDocument getDocument(String id) throws IOException {
        Document document = getDocument(idTerm(id));
        return (document != null) ? new LuceneDocument(document, geoStrategyMapper) : null;

    protected synchronized Iterable<? extends SearchDocument> getDocuments(String resourceId) throws IOException {
        List<Document> docs = getDocuments(new Term(SearchFields.URI_FIELD_NAME, resourceId));
        return Iterables.transform(docs, new Function<Document, SearchDocument>() {

            public SearchDocument apply(Document doc) {
                return new LuceneDocument(doc, geoStrategyMapper);

    protected synchronized SearchDocument newDocument(String id, String resourceId, String context) {
        return new LuceneDocument(id, resourceId, context, geoStrategyMapper);

    protected synchronized SearchDocument copyDocument(SearchDocument doc) {
        Document document = ((LuceneDocument) doc).getDocument();
        Document newDocument = new Document();

        // add all existing fields (including id, uri, context, and text)
        for (IndexableField oldField : document.getFields()) {
        return new LuceneDocument(newDocument, geoStrategyMapper);

    protected synchronized void addDocument(SearchDocument doc) throws IOException {
        getIndexWriter().addDocument(((LuceneDocument) doc).getDocument());

    protected synchronized void updateDocument(SearchDocument doc) throws IOException {
        getIndexWriter().updateDocument(idTerm(doc.getId()), ((LuceneDocument) doc).getDocument());

    protected synchronized void deleteDocument(SearchDocument doc) throws IOException {

    protected synchronized BulkUpdater newBulkUpdate() {
        return new SimpleBulkUpdater(this);

    private Term idTerm(String id) {
        return new Term(SearchFields.ID_FIELD_NAME, id);

     * Returns a Document representing the specified document ID (combination of resource and context), or
     * null when no such Document exists yet.
    private Document getDocument(Term idTerm) throws IOException {
        IndexReader reader = getIndexReader();
        List<LeafReaderContext> leaves = reader.leaves();
        int size = leaves.size();
        for (int i = 0; i < size; i++) {
            LeafReader lreader = leaves.get(i).reader();
            Document document = getDocument(lreader, idTerm);
            if (document != null) {
                return document;
        // no such Document
        return null;

    private static Document getDocument(LeafReader reader, Term term) throws IOException {
        DocsEnum docs = reader.termDocsEnum(term);
        if (docs != null) {
            int docId = docs.nextDoc();
            if (docId != DocsEnum.NO_MORE_DOCS) {
                if (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    throw new IllegalStateException("Multiple Documents for term " + term.text());
                return readDocument(reader, docId, null);
            } else {
                return null;
        } else {
            return null;

     * Returns a list of Documents representing the specified Resource (empty when no such Document exists
     * yet). Each document represent a set of statements with the specified Resource as a subject, which are
     * stored in a specific context
    private List<Document> getDocuments(Term uriTerm) throws IOException {
        List<Document> result = new ArrayList<Document>();

        IndexReader reader = getIndexReader();
        List<LeafReaderContext> leaves = reader.leaves();
        int size = leaves.size();
        for (int i = 0; i < size; i++) {
            LeafReader lreader = leaves.get(i).reader();
            addDocuments(lreader, uriTerm, result);

        return result;

    private static void addDocuments(LeafReader reader, Term term, Collection<Document> documents)
            throws IOException {
        DocsEnum docs = reader.termDocsEnum(term);
        if (docs != null) {
            int docId;
            while ((docId = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
                Document document = readDocument(reader, docId, null);

     * Returns a Document representing the specified Resource & Context combination, or null when no such
     * Document exists yet.
    public synchronized Document getDocument(Resource subject, Resource context) throws IOException {
        // fetch the Document representing this Resource
        String resourceId = SearchFields.getResourceID(subject);
        String contextId = SearchFields.getContextID(context);
        Term idTerm = new Term(SearchFields.ID_FIELD_NAME, SearchFields.formIdString(resourceId, contextId));
        return getDocument(idTerm);

     * Returns a list of Documents representing the specified Resource (empty when no such Document exists
     * yet). Each document represent a set of statements with the specified Resource as a subject, which are
     * stored in a specific context
    public synchronized List<Document> getDocuments(Resource subject) throws IOException {
        String resourceId = SearchFields.getResourceID(subject);
        Term uriTerm = new Term(SearchFields.URI_FIELD_NAME, resourceId);
        return getDocuments(uriTerm);

     * Stores and indexes an ID in a Document.
    public static void addIDField(String id, Document document) {
        document.add(new StringField(SearchFields.ID_FIELD_NAME, id, Store.YES));

     * Add the "context" value to the doc
     * @param context
     *        the context or null, if null-context
     * @param document
     *        the document
     * @param ifNotExists
     *        check if this context exists
    public static void addContextField(String context, Document document) {
        if (context != null) {
            document.add(new StringField(SearchFields.CONTEXT_FIELD_NAME, context, Store.YES));

     * Stores and indexes the resource ID in a Document.
    public static void addResourceField(String resourceId, Document document) {
        document.add(new StringField(SearchFields.URI_FIELD_NAME, resourceId, Store.YES));

    public static void addPredicateField(String predicate, String text, Document document) {
        // store this predicate
        document.add(new TextField(predicate, text, Store.YES));

    public static void addStoredOnlyPredicateField(String predicate, String text, Document document) {
        // store this predicate
        document.add(new StoredField(predicate, text));

    public static void addTextField(String text, Document document) {
        // and in TEXT_FIELD_NAME
        document.add(new TextField(SearchFields.TEXT_FIELD_NAME, text, Store.YES));

     * invalidate readers, free them if possible (readers that are still open by a
     * {@link LuceneQueryConnection} will not be closed. Synchronized on oldmonitors because it manipulates
     * them
     * @throws IOException
    private void invalidateReaders() throws IOException {
        synchronized (oldmonitors) {
            // Move current monitor to old monitors and set null
            if (currentMonitor != null) {
                // we do NOT close it directly as it may be used by an open
                // result
                // iterator, hence moving it to the
                // list of oldmonitors where it is handled as other older
                // monitors
            currentMonitor = null;

            // close all monitors if possible
            for (Iterator<AbstractReaderMonitor> i = oldmonitors.iterator(); i.hasNext();) {
                AbstractReaderMonitor monitor =;
                if (monitor.closeWhenPossible()) {

            // check if all readers were closed
            if (oldmonitors.isEmpty()) {
                logger.debug("Deleting unused files from Lucene index");

                // clean up unused files (marked as 'deletable' in Luke
                // Filewalker)

                // logIndexStats();

    private void logIndexStats() {
        try {
            IndexReader reader = null;
            try {
                reader = getIndexReader();

                Document doc;
                int totalFields = 0;

                Set<String> ids = new HashSet<String>();
                String[] idArray;
                int count = 0;
                for (int i = 0; i < reader.maxDoc(); i++) {
                    if (isDeleted(reader, i))
                    doc = readDocument(reader, i, null);
                    totalFields += doc.getFields().size();
                    idArray = doc.getValues("id");
                    for (String id : idArray)


      "Total documents in the index: " + reader.numDocs()
                        + ", number of deletable documents in the index: " + reader.numDeletedDocs()
                        + ", valid documents: " + count + ", total fields in all documents: " + totalFields
                        + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs());
      "Distinct ids in the index: " + ids.size());

            } finally {
                ReaderMonitor toCloseCurrentMonitor = currentMonitor;
                currentMonitor = null;
                if (toCloseCurrentMonitor != null) {
        } catch (IOException e) {
            logger.warn(e.getMessage(), e);


    public synchronized void begin() throws IOException {
        // nothing to do

     * Commits any changes done to the LuceneIndex since the last commit. The semantics is synchronous to
     * SailConnection.commit(), i.e. the LuceneIndex should be committed/rollbacked whenever the
     * LuceneSailConnection is committed/rollbacked.
    public synchronized void commit() throws IOException {
        // the old IndexReaders/Searchers are not outdated

    public synchronized void rollback() throws IOException {

    // //////////////////////////////// Methods for querying the index

     * Parse the passed query. To be removed, no longer used.
     * @param query
     *        string
     * @return the parsed query
     * @throws ParseException
     *         when the parsing brakes
    protected SearchQuery parseQuery(String query, URI propertyURI) throws MalformedQueryException {
        Query q;
        try {
            q = getQueryParser(propertyURI).parse(query);
        } catch (ParseException e) {
            throw new MalformedQueryException(e);
        return new LuceneQuery(q, this);

     * Parse the passed query.
     * @param query
     *        string
     * @return the parsed query
     * @throws ParseException
     *         when the parsing brakes
    protected Iterable<? extends DocumentScore> query(Resource subject, String query, URI propertyURI,
            boolean highlight) throws MalformedQueryException, IOException {
        Query q;
        try {
            q = getQueryParser(propertyURI).parse(query);
        } catch (ParseException e) {
            throw new MalformedQueryException(e);

        final Highlighter highlighter;
        if (highlight) {
            Formatter formatter = new SimpleHTMLFormatter(SearchFields.HIGHLIGHTER_PRE_TAG,
            highlighter = new Highlighter(formatter, new QueryScorer(q));
        } else {
            highlighter = null;

        TopDocs docs;
        if (subject != null) {
            docs = search(subject, q);
        } else {
            docs = search(q);
        return Iterables.transform(Arrays.asList(docs.scoreDocs), new Function<ScoreDoc, DocumentScore>() {

            public DocumentScore apply(ScoreDoc doc) {
                return new LuceneDocumentScore(doc, highlighter, LuceneIndex.this);

    protected Iterable<? extends DocumentDistance> geoQuery(final URI geoProperty, Point p, final URI units,
            double distance, String distanceVar, Var contextVar) throws MalformedQueryException, IOException {
        double degs = GeoUnits.toDegrees(distance, units);
        final String geoField = SearchFields.getPropertyField(geoProperty);
        SpatialStrategy strategy = getSpatialStrategyMapper().apply(geoField);
        final Shape boundingCircle = strategy.getSpatialContext().makeCircle(p, degs);
        Query q = strategy.makeQuery(new SpatialArgs(SpatialOperation.Intersects, boundingCircle));
        if (contextVar != null) {
            q = addContextTerm(q, (Resource) contextVar.getValue());

        TopDocs docs = search(
                new CustomScoreQuery(q, new FunctionQuery(strategy.makeRecipDistanceValueSource(boundingCircle))));
        final boolean requireContext = (contextVar != null && !contextVar.hasValue());
        return Iterables.transform(Arrays.asList(docs.scoreDocs), new Function<ScoreDoc, DocumentDistance>() {

            public DocumentDistance apply(ScoreDoc doc) {
                return new LuceneDocumentDistance(doc, geoField, units, boundingCircle.getCenter(), requireContext,

    private Query addContextTerm(Query q, Resource ctx) {
        BooleanQuery combinedQuery = new BooleanQuery();
        TermQuery idQuery = new TermQuery(
                new Term(SearchFields.CONTEXT_FIELD_NAME, SearchFields.getContextID(ctx)));
        // the specified named graph or not the unnamed graph
        combinedQuery.add(idQuery, ctx != null ? Occur.MUST : Occur.MUST_NOT);
        combinedQuery.add(q, Occur.MUST);
        return combinedQuery;

    protected Iterable<? extends DocumentResult> geoRelationQuery(String relation, URI geoProperty, Shape shape,
            Var contextVar) throws MalformedQueryException, IOException {
        SpatialOperation op = toSpatialOp(relation);
        if (op == null) {
            return null;

        final String geoField = SearchFields.getPropertyField(geoProperty);
        SpatialStrategy strategy = getSpatialStrategyMapper().apply(geoField);
        Query q = strategy.makeQuery(new SpatialArgs(op, shape));
        if (contextVar != null) {
            q = addContextTerm(q, (Resource) contextVar.getValue());

        TopDocs docs = search(q);
        final Set<String> fields = Sets.newHashSet(SearchFields.URI_FIELD_NAME, geoField);
        if (contextVar != null && !contextVar.hasValue()) {
        return Iterables.transform(Arrays.asList(docs.scoreDocs), new Function<ScoreDoc, DocumentResult>() {

            public DocumentResult apply(ScoreDoc doc) {
                return new LuceneDocumentResult(doc, LuceneIndex.this, fields);

    private SpatialOperation toSpatialOp(String relation) {
        if (GEOF.SF_INTERSECTS.stringValue().equals(relation)) {
            return SpatialOperation.Intersects;
        } else if (GEOF.SF_DISJOINT.stringValue().equals(relation)) {
            return SpatialOperation.IsDisjointTo;
        } else if (GEOF.SF_EQUALS.stringValue().equals(relation)) {
            return SpatialOperation.IsEqualTo;
        } else if (GEOF.SF_OVERLAPS.stringValue().equals(relation)) {
            return SpatialOperation.Overlaps;
        } else if (GEOF.EH_COVERED_BY.stringValue().equals(relation)) {
            return SpatialOperation.IsWithin;
        } else if (GEOF.EH_COVERS.stringValue().equals(relation)) {
            return SpatialOperation.Contains;
        return null;

     * Returns the lucene hit with the given id of the respective lucene query
     * @param id
     *        the id of the document to return
     * @return the requested hit, or null if it fails
    public synchronized Document getDocument(int docId, Set<String> fieldsToLoad) {
        try {
            return readDocument(getIndexReader(), docId, fieldsToLoad);
        } catch (CorruptIndexException e) {
            logger.error("The index seems to be corrupted:", e);
            return null;
        } catch (IOException e) {
            logger.error("Could not read from index:", e);
            return null;

    public synchronized String getSnippet(String fieldName, String text, Highlighter highlighter) {
        String snippet;
        try {
            TokenStream tokenStream = getAnalyzer().tokenStream(fieldName, new StringReader(text));
            snippet = highlighter.getBestFragments(tokenStream, text, 2, "...");
        } catch (Exception e) {
            logger.error("Exception while getting snippet for field " + fieldName, e);
            snippet = null;
        return snippet;

    public synchronized void clear() throws IOException {
        if (closed.get()) {
            throw new SailException("Index has been closed");
        // clear
        // the old IndexReaders/Searchers are not outdated
        if (indexWriter != null)

        // crate new writer
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        indexWriter = new IndexWriter(directory, indexWriterConfig);
        indexWriter = null;


    // Lucene helper methods

    private static boolean isDeleted(IndexReader reader, int docId) {
        if (reader.hasDeletions()) {
            List<LeafReaderContext> leaves = reader.leaves();
            int size = leaves.size();
            for (int i = 0; i < size; i++) {
                Bits liveDocs = leaves.get(i).reader().getLiveDocs();
                if (docId < liveDocs.length()) {
                    boolean isDeleted = !liveDocs.get(docId);
                    if (isDeleted) {
                        return true;
            return false;
        } else {
            return false;

    private static Document readDocument(IndexReader reader, int docId, Set<String> fieldsToLoad)
            throws IOException {
        DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad);
        reader.document(docId, visitor);
        return visitor.getDocument();

    static class DocumentStoredFieldVisitor extends StoredFieldVisitor {

        private final Set<String> fieldsToLoad;

        private final Document document = new Document();

        DocumentStoredFieldVisitor(Set<String> fieldsToLoad) {
            this.fieldsToLoad = fieldsToLoad;

        public Status needsField(FieldInfo fieldInfo) throws IOException {
            return (fieldsToLoad == null || fieldsToLoad.contains( ? Status.YES : Status.NO;

        public void stringField(FieldInfo fieldInfo, byte[] value) {
            final String stringValue = new String(value, StandardCharsets.UTF_8);
            String name =;
            if (SearchFields.ID_FIELD_NAME.equals(name)) {
                addIDField(stringValue, document);
            } else if (SearchFields.CONTEXT_FIELD_NAME.equals(name)) {
                addContextField(stringValue, document);
            } else if (SearchFields.URI_FIELD_NAME.equals(name)) {
                addResourceField(stringValue, document);
            } else if (SearchFields.TEXT_FIELD_NAME.equals(name)) {
                addTextField(stringValue, document);
            } else {
                addPredicateField(name, stringValue, document);

        Document getDocument() {
            return document;