gobblin.cluster.GobblinClusterKillTest.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.cluster.GobblinClusterKillTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.cluster;

import com.google.common.base.Optional;
import com.google.common.base.Predicate;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigValueFactory;
import gobblin.testing.AssertWithBackoff;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Collection;
import java.util.Collections;
import java.util.concurrent.TimeoutException;
import org.apache.commons.io.FileUtils;
import org.apache.curator.test.TestingServer;
import org.apache.hadoop.fs.Path;
import org.apache.helix.HelixManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

/**
 * Unit tests for killing {@link GobblinClusterManager}s and {@link GobblinTaskRunner}s
 *
 * <p>
 *   This class uses a {@link TestingServer} as an embedded ZooKeeper server for testing. The Curator
 *   framework is used to provide a ZooKeeper client. This class also uses the {@link HelixManager} to
 *   act as a testing Helix participant to receive the container (running the {@link GobblinTaskRunner})
 *   shutdown request message.
 * </p>
 */
@Test(groups = { "gobblin.cluster" }, singleThreaded = true)
public class GobblinClusterKillTest {
    public final static Logger LOG = LoggerFactory.getLogger(GobblinClusterKillTest.class);

    private TestingServer _testingZKServer;

    private final static int ASSERT_TIMEOUT = 60000;
    private final static int ASSERT_MAX_SLEEP = 2000;
    private final static int NUM_MANAGERS = 2;
    private final static int NUM_WORKERS = 2;
    private GobblinClusterManager[] _clusterManagers;
    private GobblinTaskRunner[] _clusterWorkers;
    private Thread[] _workerStartThreads;

    private String _testDirPath;
    private String _jobDirPath;
    Config _config;

    /**
     * clean up and set up test directory
     */
    private void setupTestDir() throws IOException {
        _testDirPath = _config.getString("gobblin.cluster.work.dir");
        _jobDirPath = _config.getString(GobblinClusterConfigurationKeys.JOB_CONF_PATH_KEY);

        // clean up test directory and create job dir
        File testDir = new File(_testDirPath);
        File jobDir = new File(_jobDirPath);

        if (testDir.exists()) {
            FileUtils.deleteDirectory(testDir);
        }

        jobDir.mkdirs();

        // copy job file from resource
        String jobFileName = GobblinClusterKillTest.class.getSimpleName() + "Job.conf";
        try (InputStream resourceStream = this.getClass().getClassLoader().getResourceAsStream(jobFileName)) {
            if (resourceStream == null) {
                throw new RuntimeException("Could not find job resource " + jobFileName);
            }
            File targetFile = new File(_jobDirPath + "/" + jobFileName);

            FileUtils.copyInputStreamToFile(resourceStream, targetFile);
        } catch (IOException e) {
            throw new RuntimeException("Unable to load job resource " + jobFileName, e);
        }
    }

    /**
     * Create and start a cluster manager
     * @param id - array offset
     * @throws Exception
     */
    private void setupManager(int id) throws Exception {
        _clusterManagers[id] = new GobblinClusterManager(TestHelper.TEST_APPLICATION_NAME,
                TestHelper.TEST_APPLICATION_ID,
                _config.withValue(GobblinClusterConfigurationKeys.HELIX_INSTANCE_NAME_KEY,
                        ConfigValueFactory.fromAnyRef("Manager_" + id)),
                Optional.of(new Path(_config.getString("gobblin.cluster.work.dir"))));

        _clusterManagers[id].start();
    }

    /**
     * Create and start a cluster worker
     * @param id - array offset
     * @throws Exception
     */
    private void setupWorker(int id) throws Exception {
        final GobblinTaskRunner fworker = new GobblinTaskRunner(TestHelper.TEST_APPLICATION_NAME, "Worker_" + id,
                TestHelper.TEST_APPLICATION_ID, "1", _config,
                Optional.of(new Path(_config.getString("gobblin.cluster.work.dir"))));

        _clusterWorkers[id] = fworker;
        _workerStartThreads[id] = new Thread(new Runnable() {
            @Override
            public void run() {
                fworker.start();
            }
        });
        _workerStartThreads[id].start();
    }

    @BeforeClass
    public void setUp() throws Exception {
        // Use a random ZK port
        _testingZKServer = new TestingServer(-1);
        LOG.info("Testing ZK Server listening on: " + _testingZKServer.getConnectString());

        URL url = GobblinClusterKillTest.class.getClassLoader()
                .getResource(GobblinClusterKillTest.class.getSimpleName() + ".conf");
        Assert.assertNotNull(url, "Could not find resource " + url);

        _config = ConfigFactory.parseURL(url)
                .withValue("gobblin.cluster.zk.connection.string",
                        ConfigValueFactory.fromAnyRef(_testingZKServer.getConnectString()))
                .withValue("gobblin.cluster.jobconf.fullyQualifiedPath",
                        ConfigValueFactory.fromAnyRef("/tmp/gobblinClusterKillTest/job-conf"))
                .resolve();

        String zkConnectionString = _config.getString(GobblinClusterConfigurationKeys.ZK_CONNECTION_STRING_KEY);
        HelixUtils.createGobblinHelixCluster(zkConnectionString,
                _config.getString(GobblinClusterConfigurationKeys.HELIX_CLUSTER_NAME_KEY));

        setupTestDir();

        _clusterManagers = new GobblinClusterManager[NUM_MANAGERS];
        _clusterWorkers = new GobblinTaskRunner[NUM_WORKERS];
        _workerStartThreads = new Thread[NUM_WORKERS];

        for (int i = 0; i < NUM_MANAGERS; i++) {
            setupManager(i);
        }

        for (int i = 0; i < NUM_WORKERS; i++) {
            setupWorker(i);
        }
    }

    // The kill tests are unreliable on Travis
    @Test(groups = { "disabledOnTravis" })
    public void testKillWorker() throws TimeoutException, InterruptedException {
        Collection<File> matches = Collections.EMPTY_LIST;

        final File writerOutputDir = new File(_testDirPath + "/writer-output/gobblin/util/test/HelloWorldSource/");
        final File jobOutputDir = new File(_testDirPath + "/job-output/gobblin/util/test/HelloWorldSource/");
        final File testJobFile = new File(_jobDirPath + "/GobblinClusterKillTestJob.conf");

        // Job file should exist
        Assert.assertTrue(testJobFile.exists());

        AssertWithBackoff.create().logger(LOG).timeoutMs(ASSERT_TIMEOUT).maxSleepMs(ASSERT_MAX_SLEEP)
                .backoffFactor(1.5).assertTrue(new Predicate<Void>() {
                    @Override
                    public boolean apply(Void input) {
                        if (writerOutputDir.exists()) {
                            return FileUtils.listFiles(writerOutputDir, new String[] { "txt" }, true).size() >= 25;
                        } else {
                            return false;
                        }
                    }
                }, "Waiting for writer output");

        LOG.info("{} matches found before disconnecting worker",
                FileUtils.listFiles(writerOutputDir, new String[] { "txt" }, true).size());

        _clusterWorkers[0].disconnectHelixManager();

        AssertWithBackoff.create().logger(LOG).timeoutMs(ASSERT_TIMEOUT).maxSleepMs(ASSERT_MAX_SLEEP)
                .backoffFactor(1.5).assertTrue(new Predicate<Void>() {
                    @Override
                    public boolean apply(Void input) {
                        if (jobOutputDir.exists()) {
                            return FileUtils.listFiles(jobOutputDir, new String[] { "txt" }, true).size() >= 100;
                        } else {
                            return false;
                        }
                    }
                }, "Waiting for job-completion");

        // Job file should have been deleted
        Thread.sleep(5000);
        Assert.assertFalse(testJobFile.exists());
    }

    // The kill tests are unreliable on Travis
    @Test(groups = { "disabledOnTravis" }, dependsOnMethods = "testKillWorker")
    public void testKillManager() throws IOException, TimeoutException, InterruptedException {
        Collection<File> matches = Collections.EMPTY_LIST;
        final File writerOutputDir = new File(_testDirPath + "/writer-output/gobblin/util/test/HelloWorldSource/");
        final File jobOutputDir = new File(_testDirPath + "/job-output/gobblin/util/test/HelloWorldSource/");

        // reinitialize test directory
        setupTestDir();

        // kill a manager to cause leader election. New leader will schedule a new job.
        _clusterManagers[0].disconnectHelixManager();

        AssertWithBackoff.create().logger(LOG).timeoutMs(ASSERT_TIMEOUT).maxSleepMs(ASSERT_MAX_SLEEP)
                .backoffFactor(1.5).assertTrue(new Predicate<Void>() {
                    @Override
                    public boolean apply(Void input) {
                        if (writerOutputDir.exists()) {
                            return FileUtils.listFiles(writerOutputDir, new String[] { "txt" }, true).size() >= 25;
                        } else {
                            return false;
                        }
                    }
                }, "Waiting for writer output");

        AssertWithBackoff.create().logger(LOG).timeoutMs(ASSERT_TIMEOUT).maxSleepMs(ASSERT_MAX_SLEEP)
                .backoffFactor(1.5).assertTrue(new Predicate<Void>() {
                    @Override
                    public boolean apply(Void input) {
                        if (jobOutputDir.exists()) {
                            return FileUtils.listFiles(jobOutputDir, new String[] { "txt" }, true).size() >= 100;
                        } else {
                            return false;
                        }
                    }
                }, "Waiting for job-completion");

        // Job file should have been deleted
        Thread.sleep(5000);
        final File testJobFile = new File(_jobDirPath + "/GobblinClusterKillTestJob.conf");
        Assert.assertFalse(testJobFile.exists());
    }

    // The kill tests are unreliable on Travis
    @Test(groups = { "disabledOnTravis" }, enabled = true, dependsOnMethods = "testKillManager")
    public void testRestartManager() throws IOException, TimeoutException, InterruptedException {
        Collection<File> matches = Collections.EMPTY_LIST;
        final File writerOutputDir = new File(_testDirPath + "/writer-output/gobblin/util/test/HelloWorldSource/");
        final File jobOutputDir = new File(_testDirPath + "/job-output/gobblin/util/test/HelloWorldSource/");

        // reinitialize test directory
        setupTestDir();

        // At this point there is one connected manager. Disconnect it and reconnect the other one to confirm that a manager
        // can continue to function after regaining leadership.
        _clusterManagers[1].disconnectHelixManager();

        // Should function after regaining leadership
        // need to reinitialize the heap manager and call handleLeadershipChange to shut down services in the test
        // since the leadership change is simulated
        _clusterManagers[0].initializeHelixManager();
        _clusterManagers[0].handleLeadershipChange(null);

        // reconnect to get leadership role
        _clusterManagers[0].connectHelixManager();

        AssertWithBackoff.create().logger(LOG).timeoutMs(ASSERT_TIMEOUT).maxSleepMs(ASSERT_MAX_SLEEP)
                .backoffFactor(1.5).assertTrue(new Predicate<Void>() {
                    @Override
                    public boolean apply(Void input) {
                        if (writerOutputDir.exists()) {
                            return FileUtils.listFiles(writerOutputDir, new String[] { "txt" }, true).size() >= 25;
                        } else {
                            return false;
                        }
                    }
                }, "Waiting for writer output");

        AssertWithBackoff.create().logger(LOG).timeoutMs(ASSERT_TIMEOUT).maxSleepMs(ASSERT_MAX_SLEEP)
                .backoffFactor(1.5).assertTrue(new Predicate<Void>() {
                    @Override
                    public boolean apply(Void input) {
                        if (jobOutputDir.exists()) {
                            return FileUtils.listFiles(jobOutputDir, new String[] { "txt" }, true).size() >= 100;
                        } else {
                            return false;
                        }
                    }
                }, "Waiting for job-completion");
    }

    @AfterClass
    public void tearDown() throws IOException, InterruptedException {

        for (int i = 0; i < NUM_MANAGERS; i++) {
            _clusterManagers[i].connectHelixManager();
            if (!_clusterManagers[i].isHelixManagerConnected()) {
                _clusterManagers[i].connectHelixManager();
            }
            _clusterManagers[i].stop();
        }

        for (int i = 0; i < NUM_WORKERS; i++) {
            _clusterWorkers[i].stop();
        }

        for (int i = 0; i < NUM_WORKERS; i++) {
            _workerStartThreads[i].join();
        }

        _testingZKServer.close();
    }
}