org.apache.lucene.index.DirectoryReader.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.index.DirectoryReader.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.NoSuchFileException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import org.apache.lucene.search.SearcherManager; // javadocs
import org.apache.lucene.store.Directory;

/** DirectoryReader is an implementation of {@link CompositeReader}
 that can read indexes in a {@link Directory}. 
    
 <p>DirectoryReader instances are usually constructed with a call to
 one of the static <code>open()</code> methods, e.g. {@link
 #open(Directory)}.
    
 <p> For efficiency, in this API documents are often referred to via
 <i>document numbers</i>, non-negative integers which each name a unique
 document in the index.  These document numbers are ephemeral -- they may change
 as documents are added to and deleted from an index.  Clients should thus not
 rely on a given document having the same number between sessions.
    
 <p>
 <a name="thread-safety"></a><p><b>NOTE</b>: {@link
 IndexReader} instances are completely thread
 safe, meaning multiple threads can call any of its methods,
 concurrently.  If your application requires external
 synchronization, you should <b>not</b> synchronize on the
 <code>IndexReader</code> instance; use your own
 (non-Lucene) objects instead.
*/
public abstract class DirectoryReader extends BaseCompositeReader<LeafReader> {

    /** The index directory. */
    protected final Directory directory;

    /** Returns a IndexReader reading the index in the given
     *  Directory
     * @param directory the index directory
     * @throws IOException if there is a low-level IO error
     */
    public static DirectoryReader open(final Directory directory) throws IOException {
        return open(directory, Collections.emptyMap());
    }

    /** Returns a IndexReader reading the index in the given
     *  Directory
     * @param directory the index directory
     * @param readerAttributes the reader attributes passed to the {@link org.apache.lucene.codecs.Codec} layer of the
     *                         directory reader. This attribute map is forwarded to all leaf readers as well as to the readers
     *                         that are opened subsequently via the different flavors of {@link DirectoryReader#openIfChanged(DirectoryReader)}
     * @throws IOException if there is a low-level IO error
     */
    public static DirectoryReader open(final Directory directory, final Map<String, String> readerAttributes)
            throws IOException {
        return StandardDirectoryReader.open(directory, null, readerAttributes);
    }

    /**
     * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
     *
     * @param writer The IndexWriter to open from
     * @return The new IndexReader
     * @throws CorruptIndexException if the index is corrupt
     * @throws IOException if there is a low-level IO error
     *
     * @see #openIfChanged(DirectoryReader,IndexWriter,boolean)
     *
     * @lucene.experimental
     */
    public static DirectoryReader open(final IndexWriter writer) throws IOException {
        return open(writer, true, false);
    }

    /**
     * Expert: open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter},
     * controlling whether past deletions should be applied.
     *
     * @param writer The IndexWriter to open from
     * @param applyAllDeletes If true, all buffered deletes will
     * be applied (made visible) in the returned reader.  If
     * false, the deletes are not applied but remain buffered
     * (in IndexWriter) so that they will be applied in the
     * future.  Applying deletes can be costly, so if your app
     * can tolerate deleted documents being returned you might
     * gain some performance by passing false.
     * @param writeAllDeletes If true, new deletes will be written
     * down to index files instead of carried over from writer to
     * reader directly in heap
     *
     * @see #open(IndexWriter)
     *
     * @lucene.experimental
     */
    public static DirectoryReader open(final IndexWriter writer, boolean applyAllDeletes, boolean writeAllDeletes)
            throws IOException {
        return writer.getReader(applyAllDeletes, writeAllDeletes);
    }

    /** Expert: returns an IndexReader reading the index in the given
     *  {@link IndexCommit}.
     * @param commit the commit point to open
     * @throws IOException if there is a low-level IO error
     */
    public static DirectoryReader open(final IndexCommit commit) throws IOException {
        return open(commit, Collections.emptyMap());
    }

    /** Expert: returns an IndexReader reading the index in the given
     *  {@link IndexCommit}.
     * @param commit the commit point to open
     * @param readerAttributes the reader attributes passed to the {@link org.apache.lucene.codecs.Codec} layer of the
     *                         directory reader. This attribute map is forwarded to all leaf readers as well as to the readers
     *                         that are opened subsequently via the different flavors of {@link DirectoryReader#openIfChanged(DirectoryReader)}
     * @throws IOException if there is a low-level IO error
     */
    public static DirectoryReader open(final IndexCommit commit, Map<String, String> readerAttributes)
            throws IOException {
        return StandardDirectoryReader.open(commit.getDirectory(), commit, readerAttributes);
    }

    /**
     * If the index has changed since the provided reader was
     * opened, open and return a new reader; else, return
     * null.  The new reader, if not null, will be the same
     * type of reader as the previous one, ie an NRT reader
     * will open a new NRT reader, a MultiReader will open a
     * new MultiReader,  etc.
     *
     * <p>This method is typically far less costly than opening a
     * fully new <code>DirectoryReader</code> as it shares
     * resources (for example sub-readers) with the provided
     * <code>DirectoryReader</code>, when possible.
     *
     * <p>The provided reader is not closed (you are responsible
     * for doing so); if a new reader is returned you also
     * must eventually close it.  Be sure to never close a
     * reader while other threads are still using it; see
     * {@link SearcherManager} to simplify managing this.
     *
     * @throws CorruptIndexException if the index is corrupt
     * @throws IOException if there is a low-level IO error
     * @return null if there are no changes; else, a new
     * DirectoryReader instance which you must eventually close
     */
    public static DirectoryReader openIfChanged(DirectoryReader oldReader) throws IOException {
        final DirectoryReader newReader = oldReader.doOpenIfChanged();
        assert newReader != oldReader;
        return newReader;
    }

    /**
     * If the IndexCommit differs from what the
     * provided reader is searching, open and return a new
     * reader; else, return null.
     *
     * @see #openIfChanged(DirectoryReader)
     */
    public static DirectoryReader openIfChanged(DirectoryReader oldReader, IndexCommit commit) throws IOException {
        final DirectoryReader newReader = oldReader.doOpenIfChanged(commit);
        assert newReader != oldReader;
        return newReader;
    }

    /**
     * Expert: If there changes (committed or not) in the
     * {@link IndexWriter} versus what the provided reader is
     * searching, then open and return a new
     * IndexReader searching both committed and uncommitted
     * changes from the writer; else, return null (though, the
     * current implementation never returns null).
     *
     * <p>This provides "near real-time" searching, in that
     * changes made during an {@link IndexWriter} session can be
     * quickly made available for searching without closing
     * the writer nor calling {@link IndexWriter#commit}.
     *
     * <p>It's <i>near</i> real-time because there is no hard
     * guarantee on how quickly you can get a new reader after
     * making changes with IndexWriter.  You'll have to
     * experiment in your situation to determine if it's
     * fast enough.  As this is a new and experimental
     * feature, please report back on your findings so we can
     * learn, improve and iterate.</p>
     *
     * <p>The very first time this method is called, this
     * writer instance will make every effort to pool the
     * readers that it opens for doing merges, applying
     * deletes, etc.  This means additional resources (RAM,
     * file descriptors, CPU time) will be consumed.</p>
     *
     * <p>For lower latency on reopening a reader, you should
     * call {@link IndexWriterConfig#setMergedSegmentWarmer} to
     * pre-warm a newly merged segment before it's committed
     * to the index.  This is important for minimizing
     * index-to-search delay after a large merge.  </p>
     *
     * <p>If an addIndexes* call is running in another thread,
     * then this reader will only search those segments from
     * the foreign index that have been successfully copied
     * over, so far.</p>
     *
     * <p><b>NOTE</b>: Once the writer is closed, any
     * outstanding readers may continue to be used.  However,
     * if you attempt to reopen any of those readers, you'll
     * hit an {@link org.apache.lucene.store.AlreadyClosedException}.</p>
     *
     * @return DirectoryReader that covers entire index plus all
     * changes made so far by this IndexWriter instance, or
     * null if there are no new changes
     *
     * @param writer The IndexWriter to open from
     *
     * @throws IOException if there is a low-level IO error
     *
     * @lucene.experimental
     */
    public static DirectoryReader openIfChanged(DirectoryReader oldReader, IndexWriter writer) throws IOException {
        return openIfChanged(oldReader, writer, true);
    }

    /**
     * Expert: Opens a new reader, if there are any changes, controlling whether past deletions should be applied.
     *
     * @see #openIfChanged(DirectoryReader,IndexWriter)
     *
     * @param writer The IndexWriter to open from
     *
     * @param applyAllDeletes If true, all buffered deletes will
     * be applied (made visible) in the returned reader.  If
     * false, the deletes are not applied but remain buffered
     * (in IndexWriter) so that they will be applied in the
     * future.  Applying deletes can be costly, so if your app
     * can tolerate deleted documents being returned you might
     * gain some performance by passing false.
     *
     * @throws IOException if there is a low-level IO error
     *
     * @lucene.experimental
     */
    public static DirectoryReader openIfChanged(DirectoryReader oldReader, IndexWriter writer,
            boolean applyAllDeletes) throws IOException {
        final DirectoryReader newReader = oldReader.doOpenIfChanged(writer, applyAllDeletes);
        assert newReader != oldReader;
        return newReader;
    }

    /** Returns all commit points that exist in the Directory.
     *  Normally, because the default is {@link
     *  KeepOnlyLastCommitDeletionPolicy}, there would be only
     *  one commit point.  But if you're using a custom {@link
     *  IndexDeletionPolicy} then there could be many commits.
     *  Once you have a given commit, you can open a reader on
     *  it by calling {@link DirectoryReader#open(IndexCommit)}
     *  There must be at least one commit in
     *  the Directory, else this method throws {@link
     *  IndexNotFoundException}.  Note that if a commit is in
     *  progress while this method is running, that commit
     *  may or may not be returned.
     *  
     *  @return a sorted list of {@link IndexCommit}s, from oldest 
     *  to latest. */
    public static List<IndexCommit> listCommits(Directory dir) throws IOException {
        final String[] files = dir.listAll();

        List<IndexCommit> commits = new ArrayList<>();

        SegmentInfos latest = SegmentInfos.readLatestCommit(dir);
        final long currentGen = latest.getGeneration();

        commits.add(new StandardDirectoryReader.ReaderCommit(null, latest, dir));

        for (int i = 0; i < files.length; i++) {

            final String fileName = files[i];

            if (fileName.startsWith(IndexFileNames.SEGMENTS) && !fileName.equals(IndexFileNames.OLD_SEGMENTS_GEN)
                    && SegmentInfos.generationFromSegmentsFileName(fileName) < currentGen) {

                SegmentInfos sis = null;
                try {
                    // IOException allowed to throw there, in case
                    // segments_N is corrupt
                    sis = SegmentInfos.readCommit(dir, fileName);
                } catch (FileNotFoundException | NoSuchFileException fnfe) {
                    // LUCENE-948: on NFS (and maybe others), if
                    // you have writers switching back and forth
                    // between machines, it's very likely that the
                    // dir listing will be stale and will claim a
                    // file segments_X exists when in fact it
                    // doesn't.  So, we catch this and handle it
                    // as if the file does not exist
                }

                if (sis != null) {
                    commits.add(new StandardDirectoryReader.ReaderCommit(null, sis, dir));
                }
            }
        }

        // Ensure that the commit points are sorted in ascending order.
        Collections.sort(commits);

        return commits;
    }

    /**
     * Returns <code>true</code> if an index likely exists at
     * the specified directory.  Note that if a corrupt index
     * exists, or if an index in the process of committing 
     * @param  directory the directory to check for an index
     * @return <code>true</code> if an index exists; <code>false</code> otherwise
     */
    public static boolean indexExists(Directory directory) throws IOException {
        // LUCENE-2812, LUCENE-2727, LUCENE-4738: this logic will
        // return true in cases that should arguably be false,
        // such as only IW.prepareCommit has been called, or a
        // corrupt first commit, but it's too deadly to make
        // this logic "smarter" and risk accidentally returning
        // false due to various cases like file description
        // exhaustion, access denied, etc., because in that
        // case IndexWriter may delete the entire index.  It's
        // safer to err towards "index exists" than try to be
        // smart about detecting not-yet-fully-committed or
        // corrupt indices.  This means that IndexWriter will
        // throw an exception on such indices and the app must
        // resolve the situation manually:
        String[] files = directory.listAll();

        String prefix = IndexFileNames.SEGMENTS + "_";
        for (String file : files) {
            if (file.startsWith(prefix)) {
                return true;
            }
        }
        return false;
    }

    /**
     * Expert: Constructs a {@code DirectoryReader} on the given subReaders.
     * @param segmentReaders the wrapped atomic index segment readers. This array is
     * returned by {@link #getSequentialSubReaders} and used to resolve the correct
     * subreader for docID-based methods. <b>Please note:</b> This array is <b>not</b>
     * cloned and not protected for modification outside of this reader.
     * Subclasses of {@code DirectoryReader} should take care to not allow
     * modification of this internal array, e.g. {@link #doOpenIfChanged()}.
     */
    protected DirectoryReader(Directory directory, LeafReader[] segmentReaders) throws IOException {
        super(segmentReaders);
        this.directory = directory;
    }

    /** Returns the directory this index resides in. */
    public final Directory directory() {
        // Don't ensureOpen here -- in certain cases, when a
        // cloned/reopened reader needs to commit, it may call
        // this method on the closed original reader
        return directory;
    }

    /** Implement this method to support {@link #openIfChanged(DirectoryReader)}.
     * If this reader does not support reopen, return {@code null}, so
     * client code is happy. This should be consistent with {@link #isCurrent}
     * (should always return {@code true}) if reopen is not supported.
     * @throws IOException if there is a low-level IO error
     * @return null if there are no changes; else, a new
     * DirectoryReader instance.
     */
    protected abstract DirectoryReader doOpenIfChanged() throws IOException;

    /** Implement this method to support {@link #openIfChanged(DirectoryReader,IndexCommit)}.
     * If this reader does not support reopen from a specific {@link IndexCommit},
     * throw {@link UnsupportedOperationException}.
     * @throws IOException if there is a low-level IO error
     * @return null if there are no changes; else, a new
     * DirectoryReader instance.
     */
    protected abstract DirectoryReader doOpenIfChanged(final IndexCommit commit) throws IOException;

    /** Implement this method to support {@link #openIfChanged(DirectoryReader,IndexWriter,boolean)}.
     * If this reader does not support reopen from {@link IndexWriter},
     * throw {@link UnsupportedOperationException}.
     * @throws IOException if there is a low-level IO error
     * @return null if there are no changes; else, a new
     * DirectoryReader instance.
     */
    protected abstract DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes)
            throws IOException;

    /**
     * Version number when this IndexReader was opened.
     *
     * <p>This method
     * returns the version recorded in the commit that the
     * reader opened.  This version is advanced every time
     * a change is made with {@link IndexWriter}.</p>
     */
    public abstract long getVersion();

    /**
     * Check whether any new changes have occurred to the
     * index since this reader was opened.
     *
     * <p>If this reader was created by calling {@link #open},  
     * then this method checks if any further commits 
     * (see {@link IndexWriter#commit}) have occurred in the 
     * directory.</p>
     *
     * <p>If instead this reader is a near real-time reader
     * (ie, obtained by a call to {@link
     * DirectoryReader#open(IndexWriter)}, or by calling {@link #openIfChanged}
     * on a near real-time reader), then this method checks if
     * either a new commit has occurred, or any new
     * uncommitted changes have taken place via the writer.
     * Note that even if the writer has only performed
     * merging, this method will still return false.</p>
     *
     * <p>In any event, if this returns false, you should call
     * {@link #openIfChanged} to get a new reader that sees the
     * changes.</p>
     *
     * @throws IOException           if there is a low-level IO error
     */
    public abstract boolean isCurrent() throws IOException;

    /**
     * Expert: return the IndexCommit that this reader has opened.
     * @lucene.experimental
     */
    public abstract IndexCommit getIndexCommit() throws IOException;

}