org.apache.solr.cloud.RollingRestartTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.solr.cloud.RollingRestartTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.cloud;

import org.apache.commons.collections.CollectionUtils;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.zookeeper.KeeperException;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;

public class RollingRestartTest extends AbstractFullDistribZkTestBase {
    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

    private static final long MAX_WAIT_TIME = TimeUnit.NANOSECONDS.convert(300, TimeUnit.SECONDS);

    public RollingRestartTest() {
        sliceCount = 2;
        fixShardCount(TEST_NIGHTLY ? 16 : 2);
    }

    @Override
    public void distribSetUp() throws Exception {
        super.distribSetUp();
        useFactory("solr.StandardDirectoryFactory");
    }

    @Test
    public void test() throws Exception {
        waitForRecoveriesToFinish(false);

        restartWithRolesTest();

        waitForRecoveriesToFinish(false);
    }

    public void restartWithRolesTest() throws Exception {
        String leader = OverseerCollectionConfigSetProcessor
                .getLeaderNode(cloudClient.getZkStateReader().getZkClient());
        assertNotNull(leader);
        log.info("Current overseer leader = {}", leader);

        cloudClient.getZkStateReader().getZkClient().printLayoutToStdOut();

        int numDesignateOverseers = TEST_NIGHTLY ? 16 : 2;
        numDesignateOverseers = Math.max(getShardCount(), numDesignateOverseers);
        List<String> designates = new ArrayList<>();
        List<CloudJettyRunner> designateJettys = new ArrayList<>();
        for (int i = 0; i < numDesignateOverseers; i++) {
            int n = random().nextInt(getShardCount());
            String nodeName = cloudJettys.get(n).nodeName;
            log.info("Chose {} as overseer designate", nodeName);
            new CollectionAdminRequest.AddRole().setRole("overseer").setNode(nodeName).process(cloudClient);
            designates.add(nodeName);
            designateJettys.add(cloudJettys.get(n));
        }

        waitUntilOverseerDesignateIsLeader(cloudClient.getZkStateReader().getZkClient(), designates, MAX_WAIT_TIME);

        cloudClient.getZkStateReader().getZkClient().printLayoutToStdOut();

        boolean sawLiveDesignate = false;
        int numRestarts = 1 + random().nextInt(TEST_NIGHTLY ? 12 : 2);
        for (int i = 0; i < numRestarts; i++) {
            log.info("Rolling restart #{}", i + 1);
            for (CloudJettyRunner cloudJetty : designateJettys) {
                log.info("Restarting {}", cloudJetty);
                chaosMonkey.stopJetty(cloudJetty);
                cloudClient.getZkStateReader().updateLiveNodes();
                boolean liveDesignates = CollectionUtils
                        .intersection(cloudClient.getZkStateReader().getClusterState().getLiveNodes(), designates)
                        .size() > 0;
                if (liveDesignates) {
                    sawLiveDesignate = true;
                    boolean success = waitUntilOverseerDesignateIsLeader(
                            cloudClient.getZkStateReader().getZkClient(), designates, MAX_WAIT_TIME);
                    if (!success) {
                        leader = OverseerCollectionConfigSetProcessor
                                .getLeaderNode(cloudClient.getZkStateReader().getZkClient());
                        if (leader == null)
                            log.error("NOOVERSEER election queue is :" + OverseerCollectionConfigSetProcessor
                                    .getSortedElectionNodes(cloudClient.getZkStateReader().getZkClient(),
                                            "/overseer_elect/election"));
                        fail("No overseer designate as leader found after restart #" + (i + 1) + ": " + leader);
                    }
                }
                assertTrue("Unable to restart (#" + i + "): " + cloudJetty, ChaosMonkey.start(cloudJetty.jetty));
                boolean success = waitUntilOverseerDesignateIsLeader(cloudClient.getZkStateReader().getZkClient(),
                        designates, MAX_WAIT_TIME);
                if (!success) {
                    leader = OverseerCollectionConfigSetProcessor
                            .getLeaderNode(cloudClient.getZkStateReader().getZkClient());
                    if (leader == null)
                        log.error("NOOVERSEER election queue is :"
                                + OverseerCollectionConfigSetProcessor.getSortedElectionNodes(
                                        cloudClient.getZkStateReader().getZkClient(), "/overseer_elect/election"));
                    fail("No overseer leader found after restart #" + (i + 1) + ": " + leader);
                }

                cloudClient.getZkStateReader().updateLiveNodes();
                sawLiveDesignate = CollectionUtils
                        .intersection(cloudClient.getZkStateReader().getClusterState().getLiveNodes(), designates)
                        .size() > 0;

            }
        }

        assertTrue("Test may not be working if we never saw a live designate", sawLiveDesignate);

        leader = OverseerCollectionConfigSetProcessor.getLeaderNode(cloudClient.getZkStateReader().getZkClient());
        assertNotNull(leader);
        log.info("Current overseer leader (after restart) = {}", leader);

        cloudClient.getZkStateReader().getZkClient().printLayoutToStdOut();
    }

    static boolean waitUntilOverseerDesignateIsLeader(SolrZkClient testZkClient, List<String> overseerDesignates,
            long timeoutInNanos) throws KeeperException, InterruptedException {
        long now = System.nanoTime();
        long maxTimeout = now + timeoutInNanos; // the maximum amount of time we're willing to wait to see the designate as leader
        long timeout = now + TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS);
        boolean firstTime = true;
        int stableCheckTimeout = 2000;
        String oldleader = null;
        while (System.nanoTime() < timeout && System.nanoTime() < maxTimeout) {
            String newLeader = OverseerCollectionConfigSetProcessor.getLeaderNode(testZkClient);
            if (newLeader != null && !newLeader.equals(oldleader)) {
                // the leaders have changed, let's move the timeout further
                timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS);
                log.info("oldLeader={} newLeader={} - Advancing timeout to: {}", oldleader, newLeader, timeout);
                oldleader = newLeader;
            }
            if (!overseerDesignates.contains(newLeader)) {
                Thread.sleep(500);
            } else {
                if (firstTime) {
                    firstTime = false;
                    Thread.sleep(stableCheckTimeout);
                } else {
                    return true;
                }
            }
        }
        if (System.nanoTime() < maxTimeout) {
            log.error("Max wait time exceeded");
        }
        return false;
    }
}