org.apache.lucene.index.SegmentReader.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.index.SegmentReader.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArraySet;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;

/**
 * IndexReader implementation over a single segment. 
 * <p>
 * Instances pointing to the same segment (but with different deletes, etc)
 * may share the same core data.
 * @lucene.experimental
 */
public final class SegmentReader extends CodecReader {

    private final SegmentCommitInfo si;
    // this is the original SI that IW uses internally but it's mutated behind the scenes
    // and we don't want this SI to be used for anything. Yet, IW needs this to do maintainance
    // and lookup pooled readers etc.
    private final SegmentCommitInfo originalSi;
    private final LeafMetaData metaData;
    private final Bits liveDocs;
    private final Bits hardLiveDocs;

    // Normally set to si.maxDoc - si.delDocCount, unless we
    // were created as an NRT reader from IW, in which case IW
    // tells us the number of live docs:
    private final int numDocs;

    final SegmentCoreReaders core;
    final SegmentDocValues segDocValues;

    /** True if we are holding RAM only liveDocs or DV updates, i.e. the SegmentCommitInfo delGen doesn't match our liveDocs. */
    final boolean isNRT;

    final DocValuesProducer docValuesProducer;
    final FieldInfos fieldInfos;

    /**
     * Constructs a new SegmentReader with a new core.
     * @throws CorruptIndexException if the index is corrupt
     * @throws IOException if there is a low-level IO error
     */
    SegmentReader(SegmentCommitInfo si, int createdVersionMajor, boolean openedFromWriter, IOContext context,
            Map<String, String> readerAttributes) throws IOException {
        this.si = si.clone();
        this.originalSi = si;
        this.metaData = new LeafMetaData(createdVersionMajor, si.info.getMinVersion(), si.info.getIndexSort());

        // We pull liveDocs/DV updates from disk:
        this.isNRT = false;

        core = new SegmentCoreReaders(si.info.dir, si, openedFromWriter, context, readerAttributes);
        segDocValues = new SegmentDocValues();

        boolean success = false;
        final Codec codec = si.info.getCodec();
        try {
            if (si.hasDeletions()) {
                // NOTE: the bitvector is stored using the regular directory, not cfs
                hardLiveDocs = liveDocs = codec.liveDocsFormat().readLiveDocs(directory(), si, IOContext.READONCE);
            } else {
                assert si.getDelCount() == 0;
                hardLiveDocs = liveDocs = null;
            }
            numDocs = si.info.maxDoc() - si.getDelCount();

            fieldInfos = initFieldInfos();
            docValuesProducer = initDocValuesProducer();
            assert assertLiveDocs(isNRT, hardLiveDocs, liveDocs);
            success = true;
        } finally {
            // With lock-less commits, it's entirely possible (and
            // fine) to hit a FileNotFound exception above.  In
            // this case, we want to explicitly close any subset
            // of things that were opened so that we don't have to
            // wait for a GC to do so.
            if (!success) {
                doClose();
            }
        }
    }

    /** Create new SegmentReader sharing core from a previous
     *  SegmentReader and using the provided liveDocs, and recording
     *  whether those liveDocs were carried in ram (isNRT=true). */
    SegmentReader(SegmentCommitInfo si, SegmentReader sr, Bits liveDocs, Bits hardLiveDocs, int numDocs,
            boolean isNRT) throws IOException {
        if (numDocs > si.info.maxDoc()) {
            throw new IllegalArgumentException("numDocs=" + numDocs + " but maxDoc=" + si.info.maxDoc());
        }
        if (liveDocs != null && liveDocs.length() != si.info.maxDoc()) {
            throw new IllegalArgumentException(
                    "maxDoc=" + si.info.maxDoc() + " but liveDocs.size()=" + liveDocs.length());
        }
        this.si = si.clone();
        this.originalSi = si;
        this.metaData = sr.getMetaData();
        this.liveDocs = liveDocs;
        this.hardLiveDocs = hardLiveDocs;
        assert assertLiveDocs(isNRT, hardLiveDocs, liveDocs);
        this.isNRT = isNRT;
        this.numDocs = numDocs;
        this.core = sr.core;
        core.incRef();
        this.segDocValues = sr.segDocValues;

        boolean success = false;
        try {
            fieldInfos = initFieldInfos();
            docValuesProducer = initDocValuesProducer();
            success = true;
        } finally {
            if (!success) {
                doClose();
            }
        }
    }

    private static boolean assertLiveDocs(boolean isNRT, Bits hardLiveDocs, Bits liveDocs) {
        if (isNRT) {
            assert hardLiveDocs == null
                    || liveDocs != null : " liveDocs must be non null if hardLiveDocs are non null";
        } else {
            assert hardLiveDocs == liveDocs : "non-nrt case must have identical liveDocs";
        }
        return true;
    }

    /**
     * init most recent DocValues for the current commit
     */
    private DocValuesProducer initDocValuesProducer() throws IOException {

        if (fieldInfos.hasDocValues() == false) {
            return null;
        } else {
            Directory dir;
            if (core.cfsReader != null) {
                dir = core.cfsReader;
            } else {
                dir = si.info.dir;
            }
            if (si.hasFieldUpdates()) {
                return new SegmentDocValuesProducer(si, dir, core.coreFieldInfos, fieldInfos, segDocValues);
            } else {
                // simple case, no DocValues updates
                return segDocValues.getDocValuesProducer(-1L, si, dir, fieldInfos);
            }
        }
    }

    /**
     * init most recent FieldInfos for the current commit
     */
    private FieldInfos initFieldInfos() throws IOException {
        if (!si.hasFieldUpdates()) {
            return core.coreFieldInfos;
        } else {
            // updates always outside of CFS
            FieldInfosFormat fisFormat = si.info.getCodec().fieldInfosFormat();
            final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX);
            return fisFormat.read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE);
        }
    }

    @Override
    public Bits getLiveDocs() {
        ensureOpen();
        return liveDocs;
    }

    @Override
    protected void doClose() throws IOException {
        //System.out.println("SR.close seg=" + si);
        try {
            core.decRef();
        } finally {
            if (docValuesProducer instanceof SegmentDocValuesProducer) {
                segDocValues.decRef(((SegmentDocValuesProducer) docValuesProducer).dvGens);
            } else if (docValuesProducer != null) {
                segDocValues.decRef(Collections.singletonList(-1L));
            }
        }
    }

    @Override
    public FieldInfos getFieldInfos() {
        ensureOpen();
        return fieldInfos;
    }

    @Override
    public int numDocs() {
        // Don't call ensureOpen() here (it could affect performance)
        return numDocs;
    }

    @Override
    public int maxDoc() {
        // Don't call ensureOpen() here (it could affect performance)
        return si.info.maxDoc();
    }

    @Override
    public TermVectorsReader getTermVectorsReader() {
        ensureOpen();
        return core.termVectorsLocal.get();
    }

    @Override
    public StoredFieldsReader getFieldsReader() {
        ensureOpen();
        return core.fieldsReaderLocal.get();
    }

    @Override
    public PointsReader getPointsReader() {
        ensureOpen();
        return core.pointsReader;
    }

    @Override
    public NormsProducer getNormsReader() {
        ensureOpen();
        return core.normsProducer;
    }

    @Override
    public DocValuesProducer getDocValuesReader() {
        ensureOpen();
        return docValuesProducer;
    }

    @Override
    public FieldsProducer getPostingsReader() {
        ensureOpen();
        return core.fields;
    }

    @Override
    public String toString() {
        // SegmentInfo.toString takes dir and number of
        // *pending* deletions; so we reverse compute that here:
        return si.toString(si.info.maxDoc() - numDocs - si.getDelCount());
    }

    /**
     * Return the name of the segment this reader is reading.
     */
    public String getSegmentName() {
        return si.info.name;
    }

    /**
     * Return the SegmentInfoPerCommit of the segment this reader is reading.
     */
    public SegmentCommitInfo getSegmentInfo() {
        return si;
    }

    /** Returns the directory this index resides in. */
    public Directory directory() {
        // Don't ensureOpen here -- in certain cases, when a
        // cloned/reopened reader needs to commit, it may call
        // this method on the closed original reader
        return si.info.dir;
    }

    private final Set<ClosedListener> readerClosedListeners = new CopyOnWriteArraySet<>();

    @Override
    void notifyReaderClosedListeners() throws IOException {
        synchronized (readerClosedListeners) {
            IOUtils.applyToAll(readerClosedListeners, l -> l.onClose(readerCacheHelper.getKey()));
        }
    }

    private final IndexReader.CacheHelper readerCacheHelper = new IndexReader.CacheHelper() {
        private final IndexReader.CacheKey cacheKey = new IndexReader.CacheKey();

        @Override
        public CacheKey getKey() {
            return cacheKey;
        }

        @Override
        public void addClosedListener(ClosedListener listener) {
            ensureOpen();
            readerClosedListeners.add(listener);
        }
    };

    @Override
    public CacheHelper getReaderCacheHelper() {
        return readerCacheHelper;
    }

    /** Wrap the cache helper of the core to add ensureOpen() calls that make
     *  sure users do not register closed listeners on closed indices. */
    private final IndexReader.CacheHelper coreCacheHelper = new IndexReader.CacheHelper() {

        @Override
        public CacheKey getKey() {
            return core.getCacheHelper().getKey();
        }

        @Override
        public void addClosedListener(ClosedListener listener) {
            ensureOpen();
            core.getCacheHelper().addClosedListener(listener);
        }
    };

    @Override
    public CacheHelper getCoreCacheHelper() {
        return coreCacheHelper;
    }

    @Override
    public LeafMetaData getMetaData() {
        return metaData;
    }

    /**
     * Returns the original SegmentInfo passed to the segment reader on creation time.
     * {@link #getSegmentInfo()} returns a clone of this instance.
     */
    SegmentCommitInfo getOriginalSegmentInfo() {
        return originalSi;
    }

    /**
     * Returns the live docs that are not hard-deleted. This is an expert API to be used with
     * soft-deletes to filter out document that hard deleted for instance due to aborted documents or to distinguish
     * soft and hard deleted documents ie. a rolled back tombstone.
     * @lucene.experimental
     */
    public Bits getHardLiveDocs() {
        return hardLiveDocs;
    }
}