org.apache.gobblin.data.management.retention.CleanableMysqlDatasetStoreDatasetTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.gobblin.data.management.retention.CleanableMysqlDatasetStoreDatasetTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.gobblin.data.management.retention;

import java.io.IOException;
import java.util.List;

import org.apache.commons.dbcp.BasicDataSource;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import com.typesafe.config.Config;
import com.typesafe.config.ConfigValueFactory;

import org.apache.gobblin.config.ConfigBuilder;
import org.apache.gobblin.configuration.ConfigurationKeys;
import org.apache.gobblin.configuration.WorkUnitState;
import org.apache.gobblin.data.management.retention.dataset.CleanableDataset;
import org.apache.gobblin.data.management.retention.dataset.finder.TimeBasedDatasetStoreDatasetFinder;
import org.apache.gobblin.metastore.DatasetStateStore;
import org.apache.gobblin.metastore.DatasetStoreDataset;
import org.apache.gobblin.metastore.MysqlStateStore;
import org.apache.gobblin.metastore.testing.ITestMetastoreDatabase;
import org.apache.gobblin.metastore.testing.TestMetastoreDatabaseFactory;
import org.apache.gobblin.runtime.JobState;
import org.apache.gobblin.runtime.MysqlDatasetStateStore;
import org.apache.gobblin.runtime.TaskState;
import org.apache.gobblin.util.ClassAliasResolver;
import org.apache.gobblin.util.ConfigUtils;

/**
 * Unit test for cleaning {@link MysqlStateStore} and {@link MysqlDatasetStateStore}
 */
@Test(singleThreaded = true)
public class CleanableMysqlDatasetStoreDatasetTest {
    private static final String TEST_STATE_STORE = "TestStateStore";
    private static final String TEST_JOB_NAME1 = "TestJob1";
    private static final String TEST_JOB_NAME2 = "TestJob2";
    private static final String TEST_JOB_ID = "TestJobId";
    private static final String TEST_TASK_ID_PREFIX = "TestTask-";
    private static final String TEST_DATASET_URN1 = "TestDataset1";
    private static final String TEST_DATASET_URN2 = "TestDataset2";

    private MysqlStateStore<JobState> dbJobStateStore;
    private DatasetStateStore<JobState.DatasetState> dbDatasetStateStore;
    private long startTime = System.currentTimeMillis();
    private Config config;

    private ITestMetastoreDatabase testMetastoreDatabase;
    private static final String TEST_USER = "testUser";
    private static final String TEST_PASSWORD = "testPassword";

    private static String getJobId(String jobIdBase, int index) {
        return jobIdBase + index;
    }

    @BeforeClass
    public void setUp() throws Exception {
        this.testMetastoreDatabase = TestMetastoreDatabaseFactory.get();
        String jdbcUrl = this.testMetastoreDatabase.getJdbcUrl();
        ConfigBuilder configBuilder = ConfigBuilder.create();
        BasicDataSource mySqlDs = new BasicDataSource();

        mySqlDs.setDriverClassName(ConfigurationKeys.DEFAULT_STATE_STORE_DB_JDBC_DRIVER);
        mySqlDs.setDefaultAutoCommit(false);
        mySqlDs.setUrl(jdbcUrl);
        mySqlDs.setUsername(TEST_USER);
        mySqlDs.setPassword(TEST_PASSWORD);

        this.dbJobStateStore = new MysqlStateStore<>(mySqlDs, TEST_STATE_STORE, false, JobState.class);

        configBuilder.addPrimitive("selection.timeBased.lookbackTime", "10m");
        configBuilder.addPrimitive(ConfigurationKeys.STATE_STORE_TYPE_KEY, "mysql");
        configBuilder.addPrimitive(ConfigurationKeys.STATE_STORE_DB_TABLE_KEY, TEST_STATE_STORE);
        configBuilder.addPrimitive(ConfigurationKeys.STATE_STORE_DB_URL_KEY, jdbcUrl);
        configBuilder.addPrimitive(ConfigurationKeys.STATE_STORE_DB_USER_KEY, TEST_USER);
        configBuilder.addPrimitive(ConfigurationKeys.STATE_STORE_DB_PASSWORD_KEY, TEST_PASSWORD);

        ClassAliasResolver<DatasetStateStore.Factory> resolver = new ClassAliasResolver<>(
                DatasetStateStore.Factory.class);

        DatasetStateStore.Factory stateStoreFactory = resolver.resolveClass("mysql").newInstance();
        this.config = configBuilder.build();
        this.dbDatasetStateStore = stateStoreFactory.createStateStore(configBuilder.build());

        // clear data that may have been left behind by a prior test run
        this.dbJobStateStore.delete(TEST_JOB_NAME1);
        this.dbDatasetStateStore.delete(TEST_JOB_NAME1);
        this.dbJobStateStore.delete(TEST_JOB_NAME2);
        this.dbDatasetStateStore.delete(TEST_JOB_NAME2);
    }

    /**
     * Test cleanup of the job state store
     * @throws IOException
     */
    @Test
    public void testCleanJobStateStore() throws IOException {
        JobState jobState = new JobState(TEST_JOB_NAME1, getJobId(TEST_JOB_ID, 1));
        jobState.setId(getJobId(TEST_JOB_ID, 1));
        jobState.setProp("foo", "bar");
        jobState.setState(JobState.RunningState.COMMITTED);
        jobState.setStartTime(this.startTime);
        jobState.setEndTime(this.startTime + 1000);
        jobState.setDuration(1000);

        for (int i = 0; i < 3; i++) {
            TaskState taskState = new TaskState();
            taskState.setJobId(getJobId(TEST_JOB_ID, 1));
            taskState.setTaskId(TEST_TASK_ID_PREFIX + i);
            taskState.setId(TEST_TASK_ID_PREFIX + i);
            taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED);
            jobState.addTaskState(taskState);
        }

        // set old time to test that this state is deleted
        this.dbJobStateStore.setTestTimestamp(System.currentTimeMillis() / 1000 - (60 * 20));

        this.dbJobStateStore.put(TEST_JOB_NAME1, MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX, jobState);

        jobState.setJobName(TEST_JOB_NAME2);
        jobState.setJobId(getJobId(TEST_JOB_ID, 2));
        jobState.setId(getJobId(TEST_JOB_ID, 2));

        // set current time to test that the state is not deleted
        this.dbJobStateStore.setTestTimestamp(0);

        this.dbJobStateStore.put(TEST_JOB_NAME2, MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX, jobState);

        TimeBasedDatasetStoreDatasetFinder datasetFinder = new TimeBasedDatasetStoreDatasetFinder(
                FileSystem.get(new Configuration()), ConfigUtils.configToProperties(config));
        List<DatasetStoreDataset> datasets = datasetFinder.findDatasets();

        Assert.assertTrue(
                this.dbJobStateStore.exists(TEST_JOB_NAME1, MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                        + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));
        Assert.assertTrue(
                this.dbJobStateStore.exists(TEST_JOB_NAME2, MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                        + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        for (DatasetStoreDataset dataset : datasets) {
            ((CleanableDataset) dataset).clean();
        }

        Assert.assertFalse(
                this.dbJobStateStore.exists(TEST_JOB_NAME1, MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                        + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        // state with recent timestamp is not deleted
        Assert.assertTrue(
                this.dbJobStateStore.exists(TEST_JOB_NAME2, MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                        + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));
    }

    /**
     * Test cleanup of the dataset state store. This test uses the combined selection policy to test that the newest 2
     * entries are retained even when the timestamp is old.
     * 
     * @throws IOException
     */
    @Test
    public void testCleanDatasetStateStore() throws IOException {
        JobState.DatasetState datasetState = new JobState.DatasetState(TEST_JOB_NAME1, getJobId(TEST_JOB_ID, 1));

        datasetState.setDatasetUrn(TEST_DATASET_URN1);
        datasetState.setState(JobState.RunningState.COMMITTED);
        datasetState.setId(TEST_DATASET_URN1);
        datasetState.setStartTime(this.startTime);
        datasetState.setEndTime(this.startTime + 1000);
        datasetState.setDuration(1000);

        for (int i = 0; i < 3; i++) {
            TaskState taskState = new TaskState();
            taskState.setJobId(getJobId(TEST_JOB_ID, 1));
            taskState.setTaskId(TEST_TASK_ID_PREFIX + i);
            taskState.setId(TEST_TASK_ID_PREFIX + i);
            taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED);
            datasetState.addTaskState(taskState);
        }

        // set old time to test that this state is deleted
        ((MysqlDatasetStateStore) this.dbDatasetStateStore).setTestTimestamp(this.startTime / 1000 + 2 - (60 * 20));
        datasetState.setJobId(getJobId(TEST_JOB_ID, 1));
        this.dbDatasetStateStore.persistDatasetState(TEST_DATASET_URN1, datasetState);

        ((MysqlDatasetStateStore) this.dbDatasetStateStore).setTestTimestamp(this.startTime / 1000 + 4 - (60 * 20));
        datasetState.setJobId(getJobId(TEST_JOB_ID, 2));
        this.dbDatasetStateStore.persistDatasetState(TEST_DATASET_URN1, datasetState);

        ((MysqlDatasetStateStore) this.dbDatasetStateStore).setTestTimestamp(this.startTime / 1000 + 6 - (60 * 20));
        datasetState.setJobId(getJobId(TEST_JOB_ID, 3));
        this.dbDatasetStateStore.persistDatasetState(TEST_DATASET_URN1, datasetState);

        datasetState.setJobId(getJobId(TEST_JOB_ID, 1));

        // persist a second dataset state to test that retrieval of multiple dataset states works
        datasetState.setDatasetUrn(TEST_DATASET_URN2);
        datasetState.setId(TEST_DATASET_URN2);
        datasetState.setDuration(2000);

        // set current time to test that the state is not deleted
        ((MysqlDatasetStateStore) this.dbDatasetStateStore).setTestTimestamp(0);

        this.dbDatasetStateStore.persistDatasetState(TEST_DATASET_URN2, datasetState);
        datasetState.setJobName(TEST_JOB_NAME2);
        this.dbDatasetStateStore.persistDatasetState(TEST_DATASET_URN2, datasetState);

        Config cleanerConfig = config
                .withValue("gobblin.retention.selection.policy.class",
                        ConfigValueFactory
                                .fromAnyRef("org.apache.gobblin.data.management.policy.CombineSelectionPolicy"))
                .withValue("gobblin.retention.selection.combine.operation",
                        ConfigValueFactory.fromAnyRef("intersect"))
                .withValue("gobblin.retention.selection.combine.policy.classes", ConfigValueFactory.fromAnyRef(
                        "org.apache.gobblin.data.management.policy.SelectBeforeTimeBasedPolicy,org.apache.gobblin.data.management.policy.NewestKSelectionPolicy"))
                .withValue("gobblin.retention.selection.timeBased.lookbackTime",
                        ConfigValueFactory.fromAnyRef("10m"))
                .withValue("gobblin.retention.selection.newestK.versionsNotSelected",
                        ConfigValueFactory.fromAnyRef("2"));

        TimeBasedDatasetStoreDatasetFinder datasetFinder = new TimeBasedDatasetStoreDatasetFinder(
                FileSystem.get(new Configuration()), ConfigUtils.configToProperties(cleanerConfig));
        List<DatasetStoreDataset> datasets = datasetFinder.findDatasets();

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME1,
                TEST_DATASET_URN1 + "-" + MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                        + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME1, TEST_DATASET_URN1 + "-"
                + getJobId(TEST_JOB_ID, 1) + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME1, TEST_DATASET_URN1 + "-"
                + getJobId(TEST_JOB_ID, 2) + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME1, TEST_DATASET_URN1 + "-"
                + getJobId(TEST_JOB_ID, 3) + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME1,
                TEST_DATASET_URN2 + "-" + MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                        + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME1, TEST_DATASET_URN2 + "-"
                + getJobId(TEST_JOB_ID, 1) + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME2,
                TEST_DATASET_URN2 + "-" + MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                        + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME2, TEST_DATASET_URN2 + "-"
                + getJobId(TEST_JOB_ID, 1) + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        for (DatasetStoreDataset dataset : datasets) {
            ((CleanableDataset) dataset).clean();
        }

        // the most recent two entries (current and job id 3) should be retained
        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME1,
                TEST_DATASET_URN1 + "-" + MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                        + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME1, TEST_DATASET_URN1 + "-"
                + getJobId(TEST_JOB_ID, 3) + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertFalse(this.dbDatasetStateStore.exists(TEST_JOB_NAME1, TEST_DATASET_URN1 + "-"
                + getJobId(TEST_JOB_ID, 1) + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertFalse(this.dbDatasetStateStore.exists(TEST_JOB_NAME1, TEST_DATASET_URN1 + "-"
                + getJobId(TEST_JOB_ID, 2) + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME1,
                TEST_DATASET_URN2 + "-" + MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                        + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME1, TEST_DATASET_URN2 + "-"
                + getJobId(TEST_JOB_ID, 1) + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME2,
                TEST_DATASET_URN2 + "-" + MysqlDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX
                        + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));

        Assert.assertTrue(this.dbDatasetStateStore.exists(TEST_JOB_NAME2, TEST_DATASET_URN2 + "-"
                + getJobId(TEST_JOB_ID, 1) + MysqlDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX));
    }
}