gobblin.runtime.mapreduce.MRJobLauncherTest.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.runtime.mapreduce.MRJobLauncherTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.runtime.mapreduce;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

import org.apache.commons.io.FileUtils;
import org.jboss.byteman.contrib.bmunit.BMNGRunner;
import org.jboss.byteman.contrib.bmunit.BMRule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import gobblin.configuration.ConfigurationKeys;
import gobblin.metastore.FsStateStore;
import gobblin.metastore.StateStore;
import gobblin.metastore.testing.ITestMetastoreDatabase;
import gobblin.metastore.testing.TestMetastoreDatabaseFactory;
import gobblin.metrics.GobblinMetrics;
import gobblin.runtime.JobLauncherTestHelper;
import gobblin.runtime.JobState;
import gobblin.util.limiter.BaseLimiterType;
import gobblin.util.limiter.DefaultLimiterFactory;
import gobblin.writer.Destination;
import gobblin.writer.WriterOutputFormat;

/**
 * Unit test for {@link MRJobLauncher}.
 */
@Test(groups = { "gobblin.runtime.mapreduce", "gobblin.runtime" }, singleThreaded = true)
public class MRJobLauncherTest extends BMNGRunner {

    private final Logger testLogger = LoggerFactory.getLogger(MRJobLauncherTest.class);
    private Properties launcherProps;
    private JobLauncherTestHelper jobLauncherTestHelper;
    private ITestMetastoreDatabase testMetastoreDatabase;

    @BeforeClass
    public void startUp() throws Exception {
        this.testLogger.info("startUp: in");
        testMetastoreDatabase = TestMetastoreDatabaseFactory.get();

        this.launcherProps = new Properties();

        try (InputStream propsReader = getClass().getClassLoader()
                .getResourceAsStream("gobblin.mr-test.properties")) {
            this.launcherProps.load(propsReader);
        }
        this.launcherProps.setProperty(ConfigurationKeys.JOB_HISTORY_STORE_ENABLED_KEY, "true");
        this.launcherProps.setProperty(ConfigurationKeys.METRICS_ENABLED_KEY, "true");
        this.launcherProps.setProperty(ConfigurationKeys.METRICS_REPORTING_FILE_ENABLED_KEY, "false");
        this.launcherProps.setProperty(ConfigurationKeys.JOB_HISTORY_STORE_URL_KEY,
                testMetastoreDatabase.getJdbcUrl());

        StateStore<JobState.DatasetState> datasetStateStore = new FsStateStore<>(
                this.launcherProps.getProperty(ConfigurationKeys.STATE_STORE_FS_URI_KEY),
                this.launcherProps.getProperty(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY),
                JobState.DatasetState.class);

        this.jobLauncherTestHelper = new JobLauncherTestHelper(this.launcherProps, datasetStateStore);

        // Other tests may not clean up properly, clean up outputDir and stagingDir or some of these tests might fail.
        String outputDir = this.launcherProps.getProperty(ConfigurationKeys.WRITER_OUTPUT_DIR);
        String stagingDir = this.launcherProps.getProperty(ConfigurationKeys.WRITER_STAGING_DIR);
        FileUtils.deleteDirectory(new File(outputDir));
        FileUtils.deleteDirectory(new File(stagingDir));
        this.testLogger.info("startUp: out");
    }

    @Test
    public void testLaunchJob() throws Exception {
        final Logger log = LoggerFactory.getLogger(getClass().getName() + ".testLaunchJob");
        log.info("in");
        Properties jobProps = loadJobProps();
        jobProps.setProperty(ConfigurationKeys.JOB_NAME_KEY,
                jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY) + "-testLaunchJob");
        try {
            this.jobLauncherTestHelper.runTest(jobProps);
        } finally {
            this.jobLauncherTestHelper.deleteStateStore(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY));
        }
        log.info("out");
    }

    @Test
    public void testLaunchJobWithConcurrencyLimit() throws Exception {
        final Logger log = LoggerFactory.getLogger(getClass().getName() + ".testLaunchJobWithConcurrencyLimit");
        log.info("in");
        Properties jobProps = loadJobProps();
        jobProps.setProperty(ConfigurationKeys.JOB_NAME_KEY,
                jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY) + "-testLaunchJobWithConcurrencyLimit");
        jobProps.setProperty(ConfigurationKeys.MR_JOB_MAX_MAPPERS_KEY, "2");
        this.jobLauncherTestHelper.runTest(jobProps);
        jobProps.setProperty(ConfigurationKeys.MR_JOB_MAX_MAPPERS_KEY, "3");
        this.jobLauncherTestHelper.runTest(jobProps);
        jobProps.setProperty(ConfigurationKeys.MR_JOB_MAX_MAPPERS_KEY, "5");
        try {
            this.jobLauncherTestHelper.runTest(jobProps);
        } finally {
            this.jobLauncherTestHelper.deleteStateStore(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY));
        }
        log.info("out");
    }

    @Test
    public void testLaunchJobWithPullLimit() throws Exception {
        int limit = 10;
        Properties jobProps = loadJobProps();
        jobProps.setProperty(ConfigurationKeys.JOB_NAME_KEY,
                jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY) + "-testLaunchJobWithPullLimit");
        jobProps.setProperty(ConfigurationKeys.EXTRACT_LIMIT_ENABLED_KEY, Boolean.TRUE.toString());
        jobProps.setProperty(DefaultLimiterFactory.EXTRACT_LIMIT_TYPE_KEY, BaseLimiterType.COUNT_BASED.toString());
        jobProps.setProperty(DefaultLimiterFactory.EXTRACT_LIMIT_COUNT_LIMIT_KEY, Integer.toString(10));
        try {
            this.jobLauncherTestHelper.runTestWithPullLimit(jobProps, limit);
        } finally {
            this.jobLauncherTestHelper.deleteStateStore(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY));
        }
    }

    @Test
    public void testLaunchJobWithMultiWorkUnit() throws Exception {
        Properties jobProps = loadJobProps();
        jobProps.setProperty(ConfigurationKeys.JOB_NAME_KEY,
                jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY) + "-testLaunchJobWithMultiWorkUnit");
        jobProps.setProperty("use.multiworkunit", Boolean.toString(true));
        try {
            this.jobLauncherTestHelper.runTest(jobProps);
        } finally {
            this.jobLauncherTestHelper.deleteStateStore(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY));
        }
    }

    @Test(groups = { "ignore" })
    public void testCancelJob() throws Exception {
        this.jobLauncherTestHelper.runTestWithCancellation(loadJobProps());
    }

    @Test
    public void testLaunchJobWithFork() throws Exception {
        Properties jobProps = loadJobProps();
        jobProps.setProperty(ConfigurationKeys.JOB_NAME_KEY,
                jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY) + "-testLaunchJobWithFork");
        jobProps.setProperty(ConfigurationKeys.CONVERTER_CLASSES_KEY, "gobblin.test.TestConverter2");
        jobProps.setProperty(ConfigurationKeys.FORK_BRANCHES_KEY, "2");
        jobProps.setProperty(ConfigurationKeys.ROW_LEVEL_POLICY_LIST + ".0",
                "gobblin.policies.schema.SchemaRowCheckPolicy");
        jobProps.setProperty(ConfigurationKeys.ROW_LEVEL_POLICY_LIST + ".1",
                "gobblin.policies.schema.SchemaRowCheckPolicy");
        jobProps.setProperty(ConfigurationKeys.ROW_LEVEL_POLICY_LIST_TYPE + ".0", "OPTIONAL");
        jobProps.setProperty(ConfigurationKeys.ROW_LEVEL_POLICY_LIST_TYPE + ".1", "OPTIONAL");
        jobProps.setProperty(ConfigurationKeys.TASK_LEVEL_POLICY_LIST + ".0",
                "gobblin.policies.count.RowCountPolicy,gobblin.policies.schema.SchemaCompatibilityPolicy");
        jobProps.setProperty(ConfigurationKeys.TASK_LEVEL_POLICY_LIST + ".1",
                "gobblin.policies.count.RowCountPolicy,gobblin.policies.schema.SchemaCompatibilityPolicy");
        jobProps.setProperty(ConfigurationKeys.TASK_LEVEL_POLICY_LIST_TYPE + ".0", "OPTIONAL,OPTIONAL");
        jobProps.setProperty(ConfigurationKeys.TASK_LEVEL_POLICY_LIST_TYPE + ".1", "OPTIONAL,OPTIONAL");
        jobProps.setProperty(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY + ".0", WriterOutputFormat.AVRO.name());
        jobProps.setProperty(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY + ".1", WriterOutputFormat.AVRO.name());
        jobProps.setProperty(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY + ".0",
                Destination.DestinationType.HDFS.name());
        jobProps.setProperty(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY + ".1",
                Destination.DestinationType.HDFS.name());
        jobProps.setProperty(ConfigurationKeys.WRITER_STAGING_DIR + ".0",
                jobProps.getProperty(ConfigurationKeys.WRITER_STAGING_DIR));
        jobProps.setProperty(ConfigurationKeys.WRITER_STAGING_DIR + ".1",
                jobProps.getProperty(ConfigurationKeys.WRITER_STAGING_DIR));
        jobProps.setProperty(ConfigurationKeys.WRITER_OUTPUT_DIR + ".0",
                jobProps.getProperty(ConfigurationKeys.WRITER_OUTPUT_DIR));
        jobProps.setProperty(ConfigurationKeys.WRITER_OUTPUT_DIR + ".1",
                jobProps.getProperty(ConfigurationKeys.WRITER_OUTPUT_DIR));
        jobProps.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR + ".0",
                jobProps.getProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR));
        jobProps.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR + ".1",
                jobProps.getProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR));
        try {
            this.jobLauncherTestHelper.runTestWithFork(jobProps);
        } finally {
            this.jobLauncherTestHelper.deleteStateStore(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY));
        }
    }

    /**
     * Byteman test that ensures the {@link MRJobLauncher} successfully cleans up all staging data even
     * when an exception is thrown in the {@link MRJobLauncher#countersToMetrics(GobblinMetrics)} method.
     * The {@link BMRule} is to inject an {@link IOException} when the
     * {@link MRJobLauncher#countersToMetrics(GobblinMetrics)} method is called.
     */
    @Test
    @BMRule(name = "testJobCleanupOnError", targetClass = "gobblin.runtime.mapreduce.MRJobLauncher", targetMethod = "countersToMetrics(GobblinMetrics)", targetLocation = "AT ENTRY", condition = "true", action = "throw new IOException(\"Exception for testJobCleanupOnError\")")
    public void testJobCleanupOnError() throws IOException {
        Properties props = loadJobProps();
        try {
            this.jobLauncherTestHelper.runTest(props);
            Assert.fail("Byteman is not configured properly, the runTest method should have throw an exception");
        } catch (Exception e) {
            // The job should throw an exception, ignore it
        } finally {
            this.jobLauncherTestHelper.deleteStateStore(props.getProperty(ConfigurationKeys.JOB_NAME_KEY));
        }

        Assert.assertTrue(props.containsKey(ConfigurationKeys.WRITER_STAGING_DIR));
        Assert.assertTrue(props.containsKey(ConfigurationKeys.WRITER_OUTPUT_DIR));

        File stagingDir = new File(props.getProperty(ConfigurationKeys.WRITER_STAGING_DIR));
        File outputDir = new File(props.getProperty(ConfigurationKeys.WRITER_OUTPUT_DIR));

        Assert.assertEquals(FileUtils.listFiles(stagingDir, null, true).size(), 0);
        if (outputDir.exists()) {
            Assert.assertEquals(FileUtils.listFiles(outputDir, null, true).size(), 0);
        }
    }

    @Test
    public void testLaunchJobWithMultipleDatasets() throws Exception {
        Properties jobProps = loadJobProps();
        jobProps.setProperty(ConfigurationKeys.JOB_NAME_KEY,
                jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY) + "-testLaunchJobWithMultipleDatasets");
        try {
            this.jobLauncherTestHelper.runTestWithMultipleDatasets(jobProps);
        } finally {
            this.jobLauncherTestHelper.deleteStateStore(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY));
        }
    }

    /**
     * Seems setting mapreduce.map.failures.maxpercent=100 does not prevent Hadoop2's LocalJobRunner from
     * failing and aborting a job if any mapper task fails. Aborting the job causes its working directory
     * to be deleted in {@link GobblinOutputCommitter}, which further fails this test since all the output
     * {@link gobblin.runtime.TaskState}s are deleted. There may be a bug in Hadoop2's LocalJobRunner.
     *
     * Also applicable to the two tests below.
     */
    @Test(groups = { "ignore" })
    public void testLaunchJobWithCommitSuccessfulTasksPolicy() throws Exception {
        Properties jobProps = loadJobProps();
        jobProps.setProperty(ConfigurationKeys.JOB_NAME_KEY, jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY)
                + "-testLaunchJobWithCommitSuccessfulTasksPolicy");
        try {
            this.jobLauncherTestHelper.runTestWithCommitSuccessfulTasksPolicy(jobProps);
        } finally {
            this.jobLauncherTestHelper.deleteStateStore(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY));
        }
    }

    @Test(groups = { "ignore" })
    public void testLaunchJobWithMultipleDatasetsAndFaultyExtractor() throws Exception {
        Properties jobProps = loadJobProps();
        jobProps.setProperty(ConfigurationKeys.JOB_NAME_KEY, jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY)
                + "-testLaunchJobWithMultipleDatasetsAndFaultyExtractor");
        try {
            this.jobLauncherTestHelper.runTestWithMultipleDatasetsAndFaultyExtractor(jobProps, false);
        } finally {
            this.jobLauncherTestHelper.deleteStateStore(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY));
        }
    }

    @Test(groups = { "ignore" })
    public void testLaunchJobWithMultipleDatasetsAndFaultyExtractorAndPartialCommitPolicy() throws Exception {
        Properties jobProps = loadJobProps();
        jobProps.setProperty(ConfigurationKeys.JOB_NAME_KEY, jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY)
                + "-testLaunchJobWithMultipleDatasetsAndFaultyExtractorAndPartialCommitPolicy");
        try {
            this.jobLauncherTestHelper.runTestWithMultipleDatasetsAndFaultyExtractor(jobProps, true);
        } finally {
            this.jobLauncherTestHelper.deleteStateStore(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY));
        }
    }

    @AfterClass(alwaysRun = true)
    public void tearDown() throws IOException {
        if (testMetastoreDatabase != null) {
            testMetastoreDatabase.close();
        }
    }

    public Properties loadJobProps() throws IOException {
        Properties jobProps = new Properties();
        try (InputStream propsReader = getClass().getClassLoader()
                .getResourceAsStream("mr-job-conf/GobblinMRTest.pull")) {
            jobProps.load(propsReader);
        }
        jobProps.putAll(this.launcherProps);
        jobProps.setProperty(JobLauncherTestHelper.SOURCE_FILE_LIST_KEY,
                "gobblin-test/resource/source/test.avro.0," + "gobblin-test/resource/source/test.avro.1,"
                        + "gobblin-test/resource/source/test.avro.2," + "gobblin-test/resource/source/test.avro.3");

        return jobProps;
    }
}