org.apache.lucene.index.RandomIndexWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.index.RandomIndexWriter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.concurrent.CopyOnWriteArrayList;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NullInfoStream;
import org.apache.lucene.util.TestUtil;

/** Silly class that randomizes the indexing experience.  EG
 *  it may swap in a different merge policy/scheduler; may
 *  commit periodically; may or may not forceMerge in the end,
 *  may flush by doc count instead of RAM, etc. 
 */

public class RandomIndexWriter implements Closeable {

    public final IndexWriter w;
    private final Random r;
    int docCount;
    int flushAt;
    private double flushAtFactor = 1.0;
    private boolean getReaderCalled;
    private final Analyzer analyzer; // only if WE created it (then we close it)
    private final double softDeletesRatio;

    /** Returns an indexwriter that randomly mixes up thread scheduling (by yielding at test points) */
    public static IndexWriter mockIndexWriter(Directory dir, IndexWriterConfig conf, Random r) throws IOException {
        // Randomly calls Thread.yield so we mixup thread scheduling
        final Random random = new Random(r.nextLong());
        return mockIndexWriter(r, dir, conf, new TestPoint() {
            @Override
            public void apply(String message) {
                if (random.nextInt(4) == 2)
                    Thread.yield();
            }
        });
    }

    /** Returns an indexwriter that enables the specified test point */
    public static IndexWriter mockIndexWriter(Random r, Directory dir, IndexWriterConfig conf, TestPoint testPoint)
            throws IOException {
        conf.setInfoStream(new TestPointInfoStream(conf.getInfoStream(), testPoint));
        DirectoryReader reader = null;
        if (r.nextBoolean() && DirectoryReader.indexExists(dir)
                && conf.getOpenMode() != IndexWriterConfig.OpenMode.CREATE) {
            if (LuceneTestCase.VERBOSE) {
                System.out.println("RIW: open writer from reader");
            }
            reader = DirectoryReader.open(dir);
            conf.setIndexCommit(reader.getIndexCommit());
        }

        IndexWriter iw;
        boolean success = false;
        try {
            iw = new IndexWriter(dir, conf) {
                @Override
                protected boolean isEnableTestPoints() {
                    return true;
                }
            };
            success = true;
        } finally {
            if (reader != null) {
                if (success) {
                    IOUtils.close(reader);
                } else {
                    IOUtils.closeWhileHandlingException(reader);
                }
            }
        }
        return iw;
    }

    /** create a RandomIndexWriter with a random config: Uses MockAnalyzer */
    public RandomIndexWriter(Random r, Directory dir) throws IOException {
        this(r, dir, LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r)), true, r.nextBoolean());
    }

    /** create a RandomIndexWriter with a random config */
    public RandomIndexWriter(Random r, Directory dir, Analyzer a) throws IOException {
        this(r, dir, LuceneTestCase.newIndexWriterConfig(r, a));
    }

    /** create a RandomIndexWriter with the provided config */
    public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) throws IOException {
        this(r, dir, c, false, r.nextBoolean());
    }

    /** create a RandomIndexWriter with the provided config */
    public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c, boolean useSoftDeletes)
            throws IOException {
        this(r, dir, c, false, useSoftDeletes);
    }

    private RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c, boolean closeAnalyzer,
            boolean useSoftDeletes) throws IOException {
        // TODO: this should be solved in a different way; Random should not be shared (!).
        this.r = new Random(r.nextLong());
        if (useSoftDeletes) {
            c.setSoftDeletesField("___soft_deletes");
            softDeletesRatio = 1.d / (double) 1 + r.nextInt(10);
        } else {
            softDeletesRatio = 0d;
        }
        w = mockIndexWriter(dir, c, r);
        flushAt = TestUtil.nextInt(r, 10, 1000);
        if (closeAnalyzer) {
            analyzer = w.getAnalyzer();
        } else {
            analyzer = null;
        }
        if (LuceneTestCase.VERBOSE) {
            System.out.println("RIW dir=" + dir);
        }

        // Make sure we sometimes test indices that don't get
        // any forced merges:
        doRandomForceMerge = !(c.getMergePolicy() instanceof NoMergePolicy) && r.nextBoolean();
    }

    /**
     * Adds a Document.
     * @see IndexWriter#addDocument(Iterable)
     */
    public <T extends IndexableField> long addDocument(final Iterable<T> doc) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        long seqNo;
        if (r.nextInt(5) == 3) {
            // TODO: maybe, we should simply buffer up added docs
            // (but we need to clone them), and only when
            // getReader, commit, etc. are called, we do an
            // addDocuments?  Would be better testing.
            seqNo = w.addDocuments(new Iterable<Iterable<T>>() {

                @Override
                public Iterator<Iterable<T>> iterator() {
                    return new Iterator<Iterable<T>>() {

                        boolean done;

                        @Override
                        public boolean hasNext() {
                            return !done;
                        }

                        @Override
                        public void remove() {
                            throw new UnsupportedOperationException();
                        }

                        @Override
                        public Iterable<T> next() {
                            if (done) {
                                throw new IllegalStateException();
                            }
                            done = true;
                            return doc;
                        }
                    };
                }
            });
        } else {
            seqNo = w.addDocument(doc);
        }

        maybeFlushOrCommit();

        return seqNo;
    }

    private void maybeFlushOrCommit() throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        if (docCount++ == flushAt) {
            if (r.nextBoolean()) {
                flushAllBuffersSequentially();
            } else if (r.nextBoolean()) {
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("RIW.add/updateDocument: now doing a flush at docCount=" + docCount);
                }
                w.flush();
            } else {
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("RIW.add/updateDocument: now doing a commit at docCount=" + docCount);
                }
                w.commit();
            }
            flushAt += TestUtil.nextInt(r, (int) (flushAtFactor * 10), (int) (flushAtFactor * 1000));
            if (flushAtFactor < 2e6) {
                // gradually but exponentially increase time b/w flushes
                flushAtFactor *= 1.05;
            }
        }
    }

    private void flushAllBuffersSequentially() throws IOException {
        if (LuceneTestCase.VERBOSE) {
            System.out.println("RIW.add/updateDocument: now flushing the largest writer at docCount=" + docCount);
        }
        int activeThreadStateCount = w.docWriter.perThreadPool.getActiveThreadStateCount();
        int numFlushes = Math.min(1, r.nextInt(activeThreadStateCount + 1));
        for (int i = 0; i < numFlushes; i++) {
            if (w.flushNextBuffer() == false) {
                break; // stop once we didn't flush anything
            }
        }
    }

    public long addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        long seqNo = w.addDocuments(docs);
        maybeFlushOrCommit();
        return seqNo;
    }

    public long updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs)
            throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        long seqNo;
        if (useSoftDeletes()) {
            seqNo = w.softUpdateDocuments(delTerm, docs,
                    new NumericDocValuesField(w.getConfig().getSoftDeletesField(), 1));
        } else {
            seqNo = w.updateDocuments(delTerm, docs);
        }
        maybeFlushOrCommit();
        return seqNo;
    }

    private boolean useSoftDeletes() {
        return r.nextDouble() < softDeletesRatio;
    }

    /**
     * Updates a document.
     * @see IndexWriter#updateDocument(Term, Iterable)
     */
    public <T extends IndexableField> long updateDocument(Term t, final Iterable<T> doc) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        final long seqNo;
        if (useSoftDeletes()) {
            if (r.nextInt(5) == 3) {
                seqNo = w.softUpdateDocuments(t, Arrays.asList(doc),
                        new NumericDocValuesField(w.getConfig().getSoftDeletesField(), 1));
            } else {
                seqNo = w.softUpdateDocument(t, doc,
                        new NumericDocValuesField(w.getConfig().getSoftDeletesField(), 1));
            }
        } else {
            if (r.nextInt(5) == 3) {
                seqNo = w.updateDocuments(t, Arrays.asList(doc));
            } else {
                seqNo = w.updateDocument(t, doc);
            }
        }
        maybeFlushOrCommit();

        return seqNo;
    }

    public long addIndexes(Directory... dirs) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        return w.addIndexes(dirs);
    }

    public long addIndexes(CodecReader... readers) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        return w.addIndexes(readers);
    }

    public long updateNumericDocValue(Term term, String field, Long value) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        return w.updateNumericDocValue(term, field, value);
    }

    public long updateBinaryDocValue(Term term, String field, BytesRef value) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        return w.updateBinaryDocValue(term, field, value);
    }

    public long updateDocValues(Term term, Field... updates) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        return w.updateDocValues(term, updates);
    }

    public long deleteDocuments(Term term) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        return w.deleteDocuments(term);
    }

    public long deleteDocuments(Query q) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        return w.deleteDocuments(q);
    }

    public long commit() throws IOException {
        return commit(r.nextInt(10) == 0);
    }

    public long commit(boolean flushConcurrently) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        if (flushConcurrently) {
            List<Throwable> throwableList = new CopyOnWriteArrayList<>();
            Thread thread = new Thread(() -> {
                try {
                    flushAllBuffersSequentially();
                } catch (Throwable e) {
                    throwableList.add(e);
                }
            });
            thread.start();
            try {
                return w.commit();
            } catch (Throwable t) {
                throwableList.add(t);
            } finally {
                try {
                    // make sure we wait for the thread to join otherwise it might still be processing events
                    // and the IW won't be fully closed in the case of a fatal exception
                    thread.join();
                } catch (InterruptedException e) {
                    throwableList.add(e);
                }
            }
            if (throwableList.size() != 0) {
                Throwable primary = throwableList.get(0);
                for (int i = 1; i < throwableList.size(); i++) {
                    primary.addSuppressed(throwableList.get(i));
                }
                if (primary instanceof IOException) {
                    throw (IOException) primary;
                } else if (primary instanceof RuntimeException) {
                    throw (RuntimeException) primary;
                } else {
                    throw new AssertionError(primary);
                }
            }

        }
        return w.commit();
    }

    public IndexWriter.DocStats getDocStats() {
        return w.getDocStats();
    }

    public long deleteAll() throws IOException {
        return w.deleteAll();
    }

    public DirectoryReader getReader() throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        return getReader(true, false);
    }

    private boolean doRandomForceMerge;
    private boolean doRandomForceMergeAssert;

    public void forceMergeDeletes(boolean doWait) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        w.forceMergeDeletes(doWait);
    }

    public void forceMergeDeletes() throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        w.forceMergeDeletes();
    }

    public void setDoRandomForceMerge(boolean v) {
        doRandomForceMerge = v;
    }

    public void setDoRandomForceMergeAssert(boolean v) {
        doRandomForceMergeAssert = v;
    }

    private void doRandomForceMerge() throws IOException {
        if (doRandomForceMerge) {
            final int segCount = w.getSegmentCount();
            if (r.nextBoolean() || segCount == 0) {
                // full forceMerge
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("RIW: doRandomForceMerge(1)");
                }
                w.forceMerge(1);
            } else if (r.nextBoolean()) {
                // partial forceMerge
                final int limit = TestUtil.nextInt(r, 1, segCount);
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("RIW: doRandomForceMerge(" + limit + ")");
                }
                w.forceMerge(limit);
                if (limit == 1 || (w.getConfig().getMergePolicy() instanceof TieredMergePolicy) == false) {
                    assert !doRandomForceMergeAssert || w.getSegmentCount() <= limit : "limit=" + limit + " actual="
                            + w.getSegmentCount();
                }
            } else {
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("RIW: do random forceMergeDeletes()");
                }
                w.forceMergeDeletes();
            }
        }
    }

    public DirectoryReader getReader(boolean applyDeletions, boolean writeAllDeletes) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        getReaderCalled = true;
        if (r.nextInt(20) == 2) {
            doRandomForceMerge();
        }
        if (!applyDeletions || r.nextBoolean()) {
            // if we have soft deletes we can't open from a directory
            if (LuceneTestCase.VERBOSE) {
                System.out.println("RIW.getReader: use NRT reader");
            }
            if (r.nextInt(5) == 1) {
                w.commit();
            }
            return w.getReader(applyDeletions, writeAllDeletes);
        } else {
            if (LuceneTestCase.VERBOSE) {
                System.out.println("RIW.getReader: open new reader");
            }
            w.commit();
            if (r.nextBoolean()) {
                DirectoryReader reader = DirectoryReader.open(w.getDirectory());
                if (w.getConfig().getSoftDeletesField() != null) {
                    return new SoftDeletesDirectoryReaderWrapper(reader, w.getConfig().getSoftDeletesField());
                } else {
                    return reader;
                }
            } else {
                return w.getReader(applyDeletions, writeAllDeletes);
            }
        }
    }

    /**
     * Close this writer.
     * @see IndexWriter#close()
     */
    @Override
    public void close() throws IOException {
        boolean success = false;
        try {
            if (w.isClosed() == false) {
                LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
            }
            // if someone isn't using getReader() API, we want to be sure to
            // forceMerge since presumably they might open a reader on the dir.
            if (getReaderCalled == false && r.nextInt(8) == 2 && w.isClosed() == false) {
                doRandomForceMerge();
                if (w.getConfig().getCommitOnClose() == false) {
                    // index may have changed, must commit the changes, or otherwise they are discarded by the call to close()
                    w.commit();
                }
            }
            success = true;
        } finally {
            if (success) {
                IOUtils.close(w, analyzer);
            } else {
                IOUtils.closeWhileHandlingException(w, analyzer);
            }
        }
    }

    /**
     * Forces a forceMerge.
     * <p>
     * NOTE: this should be avoided in tests unless absolutely necessary,
     * as it will result in less test coverage.
     * @see IndexWriter#forceMerge(int)
     */
    public void forceMerge(int maxSegmentCount) throws IOException {
        LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
        w.forceMerge(maxSegmentCount);
    }

    static final class TestPointInfoStream extends InfoStream {
        private final InfoStream delegate;
        private final TestPoint testPoint;

        public TestPointInfoStream(InfoStream delegate, TestPoint testPoint) {
            this.delegate = delegate == null ? new NullInfoStream() : delegate;
            this.testPoint = testPoint;
        }

        @Override
        public void close() throws IOException {
            delegate.close();
        }

        @Override
        public void message(String component, String message) {
            if ("TP".equals(component)) {
                testPoint.apply(message);
            }
            if (delegate.isEnabled(component)) {
                delegate.message(component, message);
            }
        }

        @Override
        public boolean isEnabled(String component) {
            return "TP".equals(component) || delegate.isEnabled(component);
        }
    }

    /** Writes all in-memory segments to the {@link Directory}. */
    public final void flush() throws IOException {
        w.flush();
    }

    /**
     * Simple interface that is executed for each <tt>TP</tt> {@link InfoStream} component
     * message. See also {@link RandomIndexWriter#mockIndexWriter(Random, Directory, IndexWriterConfig, TestPoint)}
     */
    public interface TestPoint {
        void apply(String message);
    }
}