org.apache.solr.search.TestRecoveryHdfs.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.solr.search.TestRecoveryHdfs.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search;

import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.Future;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.cloud.hdfs.HdfsBasicDistributedZk2Test;
import org.apache.solr.cloud.hdfs.HdfsTestUtil;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.update.DirectUpdateHandler2;
import org.apache.solr.update.HdfsUpdateLog;
import org.apache.solr.update.UpdateHandler;
import org.apache.solr.update.UpdateLog;
import org.apache.solr.update.processor.DistributedUpdateProcessor.DistribPhase;
import org.apache.solr.util.IOUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.noggit.ObjectBuilder;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;

@ThreadLeakScope(Scope.NONE) // hdfs mini cluster currently leaks threads
// TODO: longer term this should be combined with TestRecovery somehow ??
public class TestRecoveryHdfs extends SolrTestCaseJ4 {

    // means that we've seen the leader and have version info (i.e. we are a non-leader replica)
    private static String FROM_LEADER = DistribPhase.FROMLEADER.toString();

    private static int timeout = 60; // acquire timeout in seconds.  change this to a huge number when debugging to prevent threads from advancing.

    private static MiniDFSCluster dfsCluster;

    private static String hdfsUri;

    private static FileSystem fs;

    @BeforeClass
    public static void beforeClass() throws Exception {
        dfsCluster = HdfsTestUtil.setupClass(
                new File(TEMP_DIR, HdfsBasicDistributedZk2Test.class.getName() + "_" + System.currentTimeMillis())
                        .getAbsolutePath());
        hdfsUri = dfsCluster.getFileSystem().getUri().toString();

        try {
            URI uri = new URI(hdfsUri);
            fs = FileSystem.newInstance(uri, new Configuration());
        } catch (IOException e) {
            throw new RuntimeException(e);
        } catch (URISyntaxException e) {
            throw new RuntimeException(e);
        }

        hdfsDataDir = hdfsUri + "/solr/shard1";
        System.setProperty("solr.data.dir", hdfsUri + "/solr/shard1");
        System.setProperty("solr.ulog.dir", hdfsUri + "/solr/shard1");

        initCore("solrconfig-tlog.xml", "schema15.xml");
    }

    @AfterClass
    public static void afterClass() throws Exception {
        System.clearProperty("solr.ulog.dir");
        System.clearProperty("solr.data.dir");
        System.clearProperty("test.build.data");
        System.clearProperty("test.cache.data");
        deleteCore();
        IOUtils.closeQuietly(fs);
        fs = null;
        HdfsTestUtil.teardownClass(dfsCluster);

        hdfsDataDir = null;
        dfsCluster = null;
    }

    // since we make up fake versions in these tests, we can get messed up by a DBQ with a real version
    // since Solr can think following updates were reordered.
    @Override
    public void clearIndex() {
        try {
            deleteByQueryAndGetVersion("*:*",
                    params("_version_", Long.toString(-Long.MAX_VALUE), DISTRIB_UPDATE_PARAM, FROM_LEADER));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Test
    public void testLogReplay() throws Exception {
        try {

            DirectUpdateHandler2.commitOnClose = false;
            final Semaphore logReplay = new Semaphore(0);
            final Semaphore logReplayFinish = new Semaphore(0);

            UpdateLog.testing_logReplayHook = new Runnable() {
                @Override
                public void run() {
                    try {
                        assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            };

            UpdateLog.testing_logReplayFinishHook = new Runnable() {
                @Override
                public void run() {
                    logReplayFinish.release();
                }
            };

            clearIndex();
            assertU(commit());

            Deque<Long> versions = new ArrayDeque<Long>();
            versions.addFirst(addAndGetVersion(sdoc("id", "A1"), null));
            versions.addFirst(addAndGetVersion(sdoc("id", "A11"), null));
            versions.addFirst(addAndGetVersion(sdoc("id", "A12"), null));
            versions.addFirst(deleteByQueryAndGetVersion("id:A11", null));
            versions.addFirst(addAndGetVersion(sdoc("id", "A13"), null));

            assertJQ(req("q", "*:*"), "/response/numFound==0");

            assertJQ(req("qt", "/get", "getVersions", "" + versions.size()), "/versions==" + versions);

            h.close();
            createCore();
            // Solr should kick this off now
            // h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();

            // verify that previous close didn't do a commit
            // recovery should be blocked by our hook
            assertJQ(req("q", "*:*"), "/response/numFound==0");

            // make sure we can still access versions after a restart
            assertJQ(req("qt", "/get", "getVersions", "" + versions.size()), "/versions==" + versions);

            // unblock recovery
            logReplay.release(1000);

            // make sure we can still access versions during recovery
            assertJQ(req("qt", "/get", "getVersions", "" + versions.size()), "/versions==" + versions);

            // wait until recovery has finished
            assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));

            assertJQ(req("q", "*:*"), "/response/numFound==3");

            // make sure we can still access versions after recovery
            assertJQ(req("qt", "/get", "getVersions", "" + versions.size()), "/versions==" + versions);

            assertU(adoc("id", "A2"));
            assertU(adoc("id", "A3"));
            assertU(delI("A2"));
            assertU(adoc("id", "A4"));

            assertJQ(req("q", "*:*"), "/response/numFound==3");

            h.close();
            createCore();
            // Solr should kick this off now
            // h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();

            // wait until recovery has finished
            assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
            assertJQ(req("q", "*:*"), "/response/numFound==5");
            assertJQ(req("q", "id:A2"), "/response/numFound==0");

            // no updates, so insure that recovery does not run
            h.close();
            int permits = logReplay.availablePermits();
            createCore();
            // Solr should kick this off now
            // h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();

            assertJQ(req("q", "*:*"), "/response/numFound==5");
            Thread.sleep(100);
            assertEquals(permits, logReplay.availablePermits()); // no updates, so insure that recovery didn't run

            assertEquals(UpdateLog.State.ACTIVE, h.getCore().getUpdateHandler().getUpdateLog().getState());

        } finally {
            DirectUpdateHandler2.commitOnClose = true;
            UpdateLog.testing_logReplayHook = null;
            UpdateLog.testing_logReplayFinishHook = null;
        }

    }

    @Test
    public void testBuffering() throws Exception {

        DirectUpdateHandler2.commitOnClose = false;
        final Semaphore logReplay = new Semaphore(0);
        final Semaphore logReplayFinish = new Semaphore(0);

        UpdateLog.testing_logReplayHook = new Runnable() {
            @Override
            public void run() {
                try {
                    assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };

        UpdateLog.testing_logReplayFinishHook = new Runnable() {
            @Override
            public void run() {
                logReplayFinish.release();
            }
        };

        SolrQueryRequest req = req();
        UpdateHandler uhandler = req.getCore().getUpdateHandler();
        UpdateLog ulog = uhandler.getUpdateLog();

        try {
            clearIndex();
            assertU(commit());

            assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
            ulog.bufferUpdates();
            assertEquals(UpdateLog.State.BUFFERING, ulog.getState());
            Future<UpdateLog.RecoveryInfo> rinfoFuture = ulog.applyBufferedUpdates();
            assertTrue(rinfoFuture == null);
            assertEquals(UpdateLog.State.ACTIVE, ulog.getState());

            ulog.bufferUpdates();
            assertEquals(UpdateLog.State.BUFFERING, ulog.getState());

            // simulate updates from a leader
            updateJ(jsonAdd(sdoc("id", "B1", "_version_", "1010")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "B11", "_version_", "1015")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonDelQ("id:B1 id:B11 id:B2 id:B3"),
                    params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-1017"));
            updateJ(jsonAdd(sdoc("id", "B2", "_version_", "1020")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "B3", "_version_", "1030")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            deleteAndGetVersion("B1", params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-2010"));

            assertJQ(req("qt", "/get", "getVersions", "6"), "=={'versions':[-2010,1030,1020,-1017,1015,1010]}");

            assertU(commit());

            assertJQ(req("qt", "/get", "getVersions", "6"), "=={'versions':[-2010,1030,1020,-1017,1015,1010]}");

            // updates should be buffered, so we should not see any results yet.
            assertJQ(req("q", "*:*"), "/response/numFound==0");

            // real-time get should also not show anything (this could change in the future,
            // but it's currently used for validating version numbers too, so it would
            // be bad for updates to be visible if we're just buffering.
            assertJQ(req("qt", "/get", "id", "B3"), "=={'doc':null}");

            rinfoFuture = ulog.applyBufferedUpdates();
            assertTrue(rinfoFuture != null);

            assertEquals(UpdateLog.State.APPLYING_BUFFERED, ulog.getState());

            logReplay.release(1000);

            UpdateLog.RecoveryInfo rinfo = rinfoFuture.get();
            assertEquals(UpdateLog.State.ACTIVE, ulog.getState());

            assertJQ(req("qt", "/get", "getVersions", "6"), "=={'versions':[-2010,1030,1020,-1017,1015,1010]}");

            assertJQ(req("q", "*:*"), "/response/numFound==2");

            // move back to recovering
            ulog.bufferUpdates();
            assertEquals(UpdateLog.State.BUFFERING, ulog.getState());

            Long ver = getVer(req("qt", "/get", "id", "B3"));
            assertEquals(1030L, ver.longValue());

            // add a reordered doc that shouldn't overwrite one in the index
            updateJ(jsonAdd(sdoc("id", "B3", "_version_", "3")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            // reorder two buffered updates
            updateJ(jsonAdd(sdoc("id", "B4", "_version_", "1040")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            deleteAndGetVersion("B4", params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-940")); // this update should not take affect
            updateJ(jsonAdd(sdoc("id", "B6", "_version_", "1060")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "B5", "_version_", "1050")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "B8", "_version_", "1080")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            // test that delete by query is at least buffered along with everything else so it will delete the
            // currently buffered id:8 (even if it doesn't currently support versioning)
            updateJ("{\"delete\": { \"query\":\"id:B2 OR id:B8\" }}",
                    params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-3000"));

            assertJQ(req("qt", "/get", "getVersions", "13"),
                    "=={'versions':[-3000,1080,1050,1060,-940,1040,3,-2010,1030,1020,-1017,1015,1010]}" // the "3" appears because versions aren't checked while buffering
            );

            logReplay.drainPermits();
            rinfoFuture = ulog.applyBufferedUpdates();
            assertTrue(rinfoFuture != null);
            assertEquals(UpdateLog.State.APPLYING_BUFFERED, ulog.getState());

            // apply a single update
            logReplay.release(1);

            // now add another update
            updateJ(jsonAdd(sdoc("id", "B7", "_version_", "1070")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            // a reordered update that should be dropped
            deleteAndGetVersion("B5", params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-950"));

            deleteAndGetVersion("B6", params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-2060"));

            logReplay.release(1000);
            UpdateLog.RecoveryInfo recInfo = rinfoFuture.get();

            assertJQ(req("q", "*:*", "sort", "id asc", "fl", "id,_version_"),
                    "/response/docs==[" + "{'id':'B3','_version_':1030}" + ",{'id':'B4','_version_':1040}"
                            + ",{'id':'B5','_version_':1050}" + ",{'id':'B7','_version_':1070}" + "]");

            assertEquals(1, recInfo.deleteByQuery);

            assertEquals(UpdateLog.State.ACTIVE, ulog.getState()); // leave each test method in a good state
        } finally {
            DirectUpdateHandler2.commitOnClose = true;
            UpdateLog.testing_logReplayHook = null;
            UpdateLog.testing_logReplayFinishHook = null;

            req().close();
        }

    }

    @Test
    @Ignore("HDFS-3107: no truncate support yet")
    public void testDropBuffered() throws Exception {

        DirectUpdateHandler2.commitOnClose = false;
        final Semaphore logReplay = new Semaphore(0);
        final Semaphore logReplayFinish = new Semaphore(0);

        UpdateLog.testing_logReplayHook = new Runnable() {
            @Override
            public void run() {
                try {
                    assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };

        UpdateLog.testing_logReplayFinishHook = new Runnable() {
            @Override
            public void run() {
                logReplayFinish.release();
            }
        };

        SolrQueryRequest req = req();
        UpdateHandler uhandler = req.getCore().getUpdateHandler();
        UpdateLog ulog = uhandler.getUpdateLog();

        try {
            clearIndex();
            assertU(commit());

            assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
            ulog.bufferUpdates();
            assertEquals(UpdateLog.State.BUFFERING, ulog.getState());
            Future<UpdateLog.RecoveryInfo> rinfoFuture = ulog.applyBufferedUpdates();
            assertTrue(rinfoFuture == null);
            assertEquals(UpdateLog.State.ACTIVE, ulog.getState());

            ulog.bufferUpdates();
            assertEquals(UpdateLog.State.BUFFERING, ulog.getState());

            // simulate updates from a leader
            updateJ(jsonAdd(sdoc("id", "C1", "_version_", "101")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "C2", "_version_", "102")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "C3", "_version_", "103")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            assertTrue(ulog.dropBufferedUpdates());
            ulog.bufferUpdates();
            updateJ(jsonAdd(sdoc("id", "C4", "_version_", "104")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "C5", "_version_", "105")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            logReplay.release(1000);
            rinfoFuture = ulog.applyBufferedUpdates();
            UpdateLog.RecoveryInfo rinfo = rinfoFuture.get();
            assertEquals(2, rinfo.adds);

            assertJQ(req("qt", "/get", "getVersions", "2"), "=={'versions':[105,104]}");

            // this time add some docs first before buffering starts (so tlog won't be at pos 0)
            updateJ(jsonAdd(sdoc("id", "C100", "_version_", "200")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "C101", "_version_", "201")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            ulog.bufferUpdates();
            updateJ(jsonAdd(sdoc("id", "C103", "_version_", "203")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "C104", "_version_", "204")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            assertTrue(ulog.dropBufferedUpdates());
            ulog.bufferUpdates();
            updateJ(jsonAdd(sdoc("id", "C105", "_version_", "205")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "C106", "_version_", "206")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            rinfoFuture = ulog.applyBufferedUpdates();
            rinfo = rinfoFuture.get();
            assertEquals(2, rinfo.adds);

            assertJQ(req("q", "*:*", "sort", "_version_ asc", "fl", "id,_version_"),
                    "/response/docs==[" + "{'id':'C4','_version_':104}" + ",{'id':'C5','_version_':105}"
                            + ",{'id':'C100','_version_':200}" + ",{'id':'C101','_version_':201}"
                            + ",{'id':'C105','_version_':205}" + ",{'id':'C106','_version_':206}" + "]");

            assertJQ(req("qt", "/get", "getVersions", "6"), "=={'versions':[206,205,201,200,105,104]}");

            ulog.bufferUpdates();
            assertEquals(UpdateLog.State.BUFFERING, ulog.getState());
            updateJ(jsonAdd(sdoc("id", "C301", "_version_", "998")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "C302", "_version_", "999")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            assertTrue(ulog.dropBufferedUpdates());

            // make sure we can overwrite with a lower version
            // TODO: is this functionality needed?
            updateJ(jsonAdd(sdoc("id", "C301", "_version_", "301")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "C302", "_version_", "302")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            assertU(commit());

            assertJQ(req("qt", "/get", "getVersions", "2"), "=={'versions':[302,301]}");

            assertJQ(req("q", "*:*", "sort", "_version_ desc", "fl", "id,_version_", "rows", "2"),
                    "/response/docs==[" + "{'id':'C302','_version_':302}" + ",{'id':'C301','_version_':301}" + "]");

            updateJ(jsonAdd(sdoc("id", "C2", "_version_", "302")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            assertEquals(UpdateLog.State.ACTIVE, ulog.getState()); // leave each test method in a good state
        } finally {
            DirectUpdateHandler2.commitOnClose = true;
            UpdateLog.testing_logReplayHook = null;
            UpdateLog.testing_logReplayFinishHook = null;

            req().close();
        }

    }

    @Test
    public void testBufferingFlags() throws Exception {

        DirectUpdateHandler2.commitOnClose = false;
        final Semaphore logReplayFinish = new Semaphore(0);

        UpdateLog.testing_logReplayFinishHook = new Runnable() {
            @Override
            public void run() {
                logReplayFinish.release();
            }
        };

        SolrQueryRequest req = req();
        UpdateHandler uhandler = req.getCore().getUpdateHandler();
        UpdateLog ulog = uhandler.getUpdateLog();

        try {
            clearIndex();
            assertU(commit());

            assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
            ulog.bufferUpdates();

            // simulate updates from a leader
            updateJ(jsonAdd(sdoc("id", "Q1", "_version_", "101")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "Q2", "_version_", "102")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "Q3", "_version_", "103")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            assertEquals(UpdateLog.State.BUFFERING, ulog.getState());

            req.close();
            h.close();
            createCore();

            req = req();
            uhandler = req.getCore().getUpdateHandler();
            ulog = uhandler.getUpdateLog();

            logReplayFinish.acquire(); // wait for replay to finish

            assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) != 0); // since we died while buffering, we should see this last

            //
            // Try again to ensure that the previous log replay didn't wipe out our flags
            //

            req.close();
            h.close();
            createCore();

            req = req();
            uhandler = req.getCore().getUpdateHandler();
            ulog = uhandler.getUpdateLog();

            assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) != 0);

            // now do some normal non-buffered adds
            updateJ(jsonAdd(sdoc("id", "Q4", "_version_", "114")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "Q5", "_version_", "115")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "Q6", "_version_", "116")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            assertU(commit());

            req.close();
            h.close();
            createCore();

            req = req();
            uhandler = req.getCore().getUpdateHandler();
            ulog = uhandler.getUpdateLog();

            assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) == 0);

            ulog.bufferUpdates();
            // simulate receiving no updates
            ulog.applyBufferedUpdates();
            updateJ(jsonAdd(sdoc("id", "Q7", "_version_", "117")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); // do another add to make sure flags are back to normal

            req.close();
            h.close();
            createCore();

            req = req();
            uhandler = req.getCore().getUpdateHandler();
            ulog = uhandler.getUpdateLog();

            assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) == 0); // check flags on Q7

            logReplayFinish.acquire();
            assertEquals(UpdateLog.State.ACTIVE, ulog.getState()); // leave each test method in a good state
        } finally {
            DirectUpdateHandler2.commitOnClose = true;
            UpdateLog.testing_logReplayHook = null;
            UpdateLog.testing_logReplayFinishHook = null;

            req().close();
        }

    }

    // make sure that on a restart, versions don't start too low
    @Test
    public void testVersionsOnRestart() throws Exception {
        clearIndex();
        assertU(commit());

        assertU(adoc("id", "D1", "val_i", "1"));
        assertU(adoc("id", "D2", "val_i", "1"));
        assertU(commit());
        long v1 = getVer(req("q", "id:D1"));
        long v1a = getVer(req("q", "id:D2"));

        h.close();
        createCore();

        assertU(adoc("id", "D1", "val_i", "2"));
        assertU(commit());
        long v2 = getVer(req("q", "id:D1"));

        assert (v2 > v1);

        assertJQ(req("qt", "/get", "getVersions", "2"), "/versions==[" + v2 + "," + v1a + "]");

    }

    // make sure that log isn't needlessly replayed after a clean shutdown
    @Test
    public void testCleanShutdown() throws Exception {
        DirectUpdateHandler2.commitOnClose = true;
        final Semaphore logReplay = new Semaphore(0);
        final Semaphore logReplayFinish = new Semaphore(0);

        UpdateLog.testing_logReplayHook = new Runnable() {
            @Override
            public void run() {
                try {
                    assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };

        UpdateLog.testing_logReplayFinishHook = new Runnable() {
            @Override
            public void run() {
                logReplayFinish.release();
            }
        };

        SolrQueryRequest req = req();
        UpdateHandler uhandler = req.getCore().getUpdateHandler();
        UpdateLog ulog = uhandler.getUpdateLog();

        try {
            clearIndex();
            assertU(commit());

            assertU(adoc("id", "E1", "val_i", "1"));
            assertU(adoc("id", "E2", "val_i", "1"));

            // set to a high enough number so this test won't hang on a bug
            logReplay.release(10);

            h.close();
            createCore();

            // make sure the docs got committed
            assertJQ(req("q", "*:*"), "/response/numFound==2");

            // make sure no replay happened
            assertEquals(10, logReplay.availablePermits());

        } finally {
            DirectUpdateHandler2.commitOnClose = true;
            UpdateLog.testing_logReplayHook = null;
            UpdateLog.testing_logReplayFinishHook = null;

            req().close();
        }
    }

    private void addDocs(int nDocs, int start, LinkedList<Long> versions) throws Exception {
        for (int i = 0; i < nDocs; i++) {
            versions.addFirst(addAndGetVersion(sdoc("id", Integer.toString(start + nDocs)), null));
        }
    }

    @Test
    public void testRemoveOldLogs() throws Exception {
        try {
            DirectUpdateHandler2.commitOnClose = false;
            final Semaphore logReplay = new Semaphore(0);
            final Semaphore logReplayFinish = new Semaphore(0);

            UpdateLog.testing_logReplayHook = new Runnable() {
                @Override
                public void run() {
                    try {
                        assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            };

            UpdateLog.testing_logReplayFinishHook = new Runnable() {
                @Override
                public void run() {
                    logReplayFinish.release();
                }
            };

            clearIndex();
            assertU(commit());

            String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

            h.close();

            String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
            for (String file : files) {
                fs.delete(new Path(logDir, file), false);
            }

            assertEquals(0, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length);

            createCore();

            int start = 0;
            int maxReq = 50;

            LinkedList<Long> versions = new LinkedList<Long>();
            addDocs(10, start, versions);
            start += 10;
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq),
                    "/versions==" + versions.subList(0, Math.min(maxReq, start)));
            assertU(commit());
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq),
                    "/versions==" + versions.subList(0, Math.min(maxReq, start)));

            addDocs(10, start, versions);
            start += 10;
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq),
                    "/versions==" + versions.subList(0, Math.min(maxReq, start)));
            assertU(commit());
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq),
                    "/versions==" + versions.subList(0, Math.min(maxReq, start)));

            assertEquals(2, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length);

            addDocs(105, start, versions);
            start += 105;
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq),
                    "/versions==" + versions.subList(0, Math.min(maxReq, start)));
            assertU(commit());
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq),
                    "/versions==" + versions.subList(0, Math.min(maxReq, start)));

            // previous two logs should be gone now
            assertEquals(1, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length);

            addDocs(1, start, versions);
            start += 1;
            h.close();
            createCore(); // trigger recovery, make sure that tlog reference handling is correct

            // test we can get versions while replay is happening
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq),
                    "/versions==" + versions.subList(0, Math.min(maxReq, start)));

            logReplay.release(1000);
            assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));

            assertJQ(req("qt", "/get", "getVersions", "" + maxReq),
                    "/versions==" + versions.subList(0, Math.min(maxReq, start)));

            addDocs(105, start, versions);
            start += 105;
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq),
                    "/versions==" + versions.subList(0, Math.min(maxReq, start)));
            assertU(commit());
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq),
                    "/versions==" + versions.subList(0, Math.min(maxReq, start)));

            // previous logs should be gone now
            assertEquals(1, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length);

            //
            // test that a corrupt tlog file doesn't stop us from coming up, or seeing versions before that tlog file.
            //
            addDocs(1, start, new LinkedList<Long>()); // don't add this to the versions list because we are going to lose it...
            h.close();
            files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
            ;
            Arrays.sort(files);

            FSDataOutputStream dos = fs.create(new Path(new Path(logDir), files[files.length - 1]), (short) 1);
            dos.writeUTF("This is a trashed log file that really shouldn't work at all, but we'll see..");
            dos.close();

            ignoreException("Failure to open existing");
            createCore();
            // we should still be able to get the list of versions (not including the trashed log file)
            assertJQ(req("qt", "/get", "getVersions", "" + maxReq),
                    "/versions==" + versions.subList(0, Math.min(maxReq, start)));
            resetExceptionIgnores();

        } finally {
            DirectUpdateHandler2.commitOnClose = true;
            UpdateLog.testing_logReplayHook = null;
            UpdateLog.testing_logReplayFinishHook = null;
        }
    }

    //
    // test that a partially written last tlog entry (that will cause problems for both reverse reading and for
    // log replay) doesn't stop us from coming up, and from recovering the documents that were not cut off.
    //

    @Test
    public void testTruncatedLog() throws Exception {
        try {
            DirectUpdateHandler2.commitOnClose = false;
            final Semaphore logReplay = new Semaphore(0);
            final Semaphore logReplayFinish = new Semaphore(0);

            UpdateLog.testing_logReplayHook = new Runnable() {
                @Override
                public void run() {
                    try {
                        assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            };

            UpdateLog.testing_logReplayFinishHook = new Runnable() {
                @Override
                public void run() {
                    logReplayFinish.release();
                }
            };

            String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

            clearIndex();
            assertU(commit());

            assertU(adoc("id", "F1"));
            assertU(adoc("id", "F2"));
            assertU(adoc("id", "F3"));

            h.close();

            String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
            Arrays.sort(files);

            FSDataOutputStream dos = fs.append(new Path(logDir, files[files.length - 1]));

            dos.writeLong(0xffffffffffffffffL);
            dos.writeChars(
                    "This should be appended to a good log file, representing a bad partially written record.");
            dos.close();

            logReplay.release(1000);
            logReplayFinish.drainPermits();
            ignoreException("OutOfBoundsException"); // this is what the corrupted log currently produces... subject to change.
            createCore();
            assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
            resetExceptionIgnores();
            assertJQ(req("q", "*:*"), "/response/numFound==3");

            //
            // Now test that the bad log file doesn't mess up retrieving latest versions
            //

            updateJ(jsonAdd(sdoc("id", "F4", "_version_", "104")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "F5", "_version_", "105")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "F6", "_version_", "106")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            // This currently skips the bad log file and also returns the version of the clearIndex (del *:*)
            // assertJQ(req("qt","/get", "getVersions","6"), "/versions==[106,105,104]");
            assertJQ(req("qt", "/get", "getVersions", "3"), "/versions==[106,105,104]");

        } finally {
            DirectUpdateHandler2.commitOnClose = true;
            UpdateLog.testing_logReplayHook = null;
            UpdateLog.testing_logReplayFinishHook = null;
        }
    }

    //
    // test that a corrupt tlog doesn't stop us from coming up
    //
    @Test
    public void testCorruptLog() throws Exception {
        try {
            DirectUpdateHandler2.commitOnClose = false;

            String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

            clearIndex();
            assertU(commit());

            assertU(adoc("id", "G1"));
            assertU(adoc("id", "G2"));
            assertU(adoc("id", "G3"));

            h.close();

            String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
            Arrays.sort(files);

            FSDataOutputStream dos = fs.create(new Path(logDir, files[files.length - 1]), (short) 1);
            dos.write(new byte[(int) 800]); // zero out file
            dos.close();

            ignoreException("Failure to open existing log file"); // this is what the corrupted log currently produces... subject to change.
            createCore();
            resetExceptionIgnores();

            // just make sure it responds
            assertJQ(req("q", "*:*"), "/response/numFound==0");

            //
            // Now test that the bad log file doesn't mess up retrieving latest versions
            //

            updateJ(jsonAdd(sdoc("id", "G4", "_version_", "104")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "G5", "_version_", "105")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));
            updateJ(jsonAdd(sdoc("id", "G6", "_version_", "106")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER));

            // This currently skips the bad log file and also returns the version of the clearIndex (del *:*)
            // assertJQ(req("qt","/get", "getVersions","6"), "/versions==[106,105,104]");
            assertJQ(req("qt", "/get", "getVersions", "3"), "/versions==[106,105,104]");

            assertU(commit());

            assertJQ(req("q", "*:*"), "/response/numFound==3");

            // This messes up some other tests (on windows) if we don't remove the bad log.
            // This *should* hopefully just be because the tests are too fragile and not because of real bugs - but it should be investigated further.
            deleteLogs();

        } finally {
            DirectUpdateHandler2.commitOnClose = true;
            UpdateLog.testing_logReplayHook = null;
            UpdateLog.testing_logReplayFinishHook = null;
        }
    }

    // in rare circumstances, two logs can be left uncapped (lacking a commit at the end signifying that all the content in the log was committed)
    @Test
    public void testRecoveryMultipleLogs() throws Exception {
        try {
            DirectUpdateHandler2.commitOnClose = false;
            final Semaphore logReplay = new Semaphore(0);
            final Semaphore logReplayFinish = new Semaphore(0);

            UpdateLog.testing_logReplayHook = new Runnable() {
                @Override
                public void run() {
                    try {
                        assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            };

            UpdateLog.testing_logReplayFinishHook = new Runnable() {
                @Override
                public void run() {
                    logReplayFinish.release();
                }
            };

            String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

            clearIndex();
            assertU(commit());

            assertU(adoc("id", "AAAAAA"));
            assertU(adoc("id", "BBBBBB"));
            assertU(adoc("id", "CCCCCC"));

            h.close();
            String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
            Arrays.sort(files);
            String fname = files[files.length - 1];

            FSDataOutputStream dos = fs.append(new Path(logDir, files[files.length - 1]));
            dos.writeLong(0xffffffffffffffffL);
            dos.writeChars(
                    "This should be appended to a good log file, representing a bad partially written record.");
            dos.close();

            FSDataInputStream dis = fs.open(new Path(logDir, files[files.length - 1]));
            byte[] content = new byte[(int) dis.available()];

            dis.readFully(content);

            dis.close();

            // Now make a newer log file with just the IDs changed.  NOTE: this may not work if log format changes too much!
            findReplace("AAAAAA".getBytes("UTF-8"), "aaaaaa".getBytes("UTF-8"), content);
            findReplace("BBBBBB".getBytes("UTF-8"), "bbbbbb".getBytes("UTF-8"), content);
            findReplace("CCCCCC".getBytes("UTF-8"), "cccccc".getBytes("UTF-8"), content);

            // WARNING... assumes format of .00000n where n is less than 9
            long logNumber = Long.parseLong(fname.substring(fname.lastIndexOf(".") + 1));
            String fname2 = String.format(Locale.ROOT, UpdateLog.LOG_FILENAME_PATTERN, UpdateLog.TLOG_NAME,
                    logNumber + 1);

            dos = fs.create(new Path(logDir, fname2), (short) 1);
            dos.write(content);
            dos.close();

            logReplay.release(1000);
            logReplayFinish.drainPermits();
            ignoreException("OutOfBoundsException"); // this is what the corrupted log currently produces... subject to change.
            createCore();
            assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
            resetExceptionIgnores();
            assertJQ(req("q", "*:*"), "/response/numFound==6");

        } finally {
            DirectUpdateHandler2.commitOnClose = true;
            UpdateLog.testing_logReplayHook = null;
            UpdateLog.testing_logReplayFinishHook = null;
        }
    }

    // NOTE: replacement must currently be same size
    private static void findReplace(byte[] from, byte[] to, byte[] data) {
        int idx = -from.length;
        for (;;) {
            idx = indexOf(from, data, idx + from.length); // skip over previous match
            if (idx < 0)
                break;
            for (int i = 0; i < to.length; i++) {
                data[idx + i] = to[i];
            }
        }
    }

    private static int indexOf(byte[] target, byte[] data, int start) {
        outer: for (int i = start; i < data.length - target.length; i++) {
            for (int j = 0; j < target.length; j++) {
                if (data[i + j] != target[j])
                    continue outer;
            }
            return i;
        }
        return -1;
    }

    // stops the core, removes the transaction logs, restarts the core.
    void deleteLogs() throws Exception {
        String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

        h.close();

        try {
            String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
            for (String file : files) {
                //new File(logDir, file).delete();
                fs.delete(new Path(logDir, file), false);
            }

            assertEquals(0, HdfsUpdateLog.getLogList(fs, new Path(logDir)).length);
        } finally {
            // make sure we create the core again, even if the assert fails so it won't mess
            // up the next test.
            createCore();
            assertJQ(req("q", "*:*"), "/response/numFound=="); // ensure it works
        }
    }

    private static Long getVer(SolrQueryRequest req) throws Exception {
        String response = JQ(req);
        Map rsp = (Map) ObjectBuilder.fromJSON(response);
        Map doc = null;
        if (rsp.containsKey("doc")) {
            doc = (Map) rsp.get("doc");
        } else if (rsp.containsKey("docs")) {
            List lst = (List) rsp.get("docs");
            if (lst.size() > 0) {
                doc = (Map) lst.get(0);
            }
        } else if (rsp.containsKey("response")) {
            Map responseMap = (Map) rsp.get("response");
            List lst = (List) responseMap.get("docs");
            if (lst.size() > 0) {
                doc = (Map) lst.get(0);
            }
        }

        if (doc == null)
            return null;

        return (Long) doc.get("_version_");
    }
}