org.apache.gobblin.data.management.copy.replication.ConfigBasedDatasetTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.gobblin.data.management.copy.replication.ConfigBasedDatasetTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.gobblin.data.management.copy.replication;

import java.net.URI;
import java.util.Collection;
import java.util.Properties;
import java.util.Set;

import lombok.extern.slf4j.Slf4j;
import org.apache.gobblin.data.management.dataset.DatasetUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.mockito.Mockito;
import org.testng.Assert;
import org.testng.annotations.Test;

import com.google.common.base.Optional;
import com.google.common.collect.Sets;

import org.apache.gobblin.configuration.ConfigurationKeys;
import org.apache.gobblin.data.management.copy.CopyConfiguration;
import org.apache.gobblin.data.management.copy.CopyEntity;
import org.apache.gobblin.data.management.copy.CopyableFile;
import org.apache.gobblin.data.management.copy.PreserveAttributes;
import org.apache.gobblin.data.management.copy.entities.PostPublishStep;
import org.apache.gobblin.data.management.copy.entities.PrePublishStep;
import org.apache.gobblin.source.extractor.ComparableWatermark;
import org.apache.gobblin.source.extractor.extract.LongWatermark;
import org.apache.gobblin.util.FileListUtils;
import org.apache.gobblin.util.PathUtils;
import org.apache.gobblin.util.commit.DeleteFileCommitStep;
import org.apache.gobblin.util.filesystem.DataFileVersionStrategy;

/**
 * Unit test for {@link ConfigBasedDataset}
 * @author mitu
 *
 */
@Test(groups = { "gobblin.data.management.copy.replication" })
@Slf4j
public class ConfigBasedDatasetTest {

    public Collection<? extends CopyEntity> testGetCopyableFilesHelper(String sourceDir, String destinationDir,
            long sourceWatermark, boolean isFilterEnabled) throws Exception {
        FileSystem localFs = FileSystem.getLocal(new Configuration());
        URI local = localFs.getUri();

        Properties properties = new Properties();
        properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");
        PathFilter pathFilter = DatasetUtils.instantiatePathFilter(properties);
        boolean applyFilterToDirectories = false;
        if (isFilterEnabled) {
            properties.setProperty(DatasetUtils.CONFIGURATION_KEY_PREFIX + "path.filter.class",
                    "org.apache.gobblin.util.filters.HiddenFilter");
            properties.setProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "true");

            pathFilter = DatasetUtils.instantiatePathFilter(properties);
            applyFilterToDirectories = Boolean
                    .parseBoolean(properties.getProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "false"));
        }

        CopyConfiguration copyConfiguration = CopyConfiguration
                .builder(FileSystem.getLocal(new Configuration()), properties).publishDir(new Path(destinationDir))
                .preserve(PreserveAttributes.fromMnemonicString("ugp")).build();

        ReplicationMetaData mockMetaData = Mockito.mock(ReplicationMetaData.class);
        Mockito.when(mockMetaData.toString()).thenReturn("Mock Meta Data");

        ReplicationConfiguration mockRC = Mockito.mock(ReplicationConfiguration.class);
        Mockito.when(mockRC.getCopyMode()).thenReturn(ReplicationCopyMode.PULL);
        Mockito.when(mockRC.getMetaData()).thenReturn(mockMetaData);
        Mockito.when(mockRC.getVersionStrategyFromConfigStore())
                .thenReturn(Optional.of(DataFileVersionStrategy.DEFAULT_DATA_FILE_VERSION_STRATEGY));
        Mockito.when(mockRC.getEnforceFileSizeMatchFromConfigStore()).thenReturn(Optional.absent());
        HadoopFsEndPoint copyFrom = Mockito.mock(HadoopFsEndPoint.class);
        Mockito.when(copyFrom.getDatasetPath()).thenReturn(new Path(sourceDir));
        Mockito.when(copyFrom.getFsURI()).thenReturn(local);
        ComparableWatermark sw = new LongWatermark(sourceWatermark);
        Mockito.when(copyFrom.getWatermark()).thenReturn(Optional.of(sw));
        Mockito.when(copyFrom.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs,
                new Path(sourceDir), pathFilter, applyFilterToDirectories));

        HadoopFsEndPoint copyTo = Mockito.mock(HadoopFsEndPoint.class);
        Mockito.when(copyTo.getDatasetPath()).thenReturn(new Path(destinationDir));
        Mockito.when(copyTo.getFsURI()).thenReturn(local);
        Optional<ComparableWatermark> tmp = Optional.absent();
        Mockito.when(copyTo.getWatermark()).thenReturn(tmp);
        Mockito.when(copyTo.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs,
                new Path(destinationDir), pathFilter, applyFilterToDirectories));

        CopyRoute route = Mockito.mock(CopyRoute.class);
        Mockito.when(route.getCopyFrom()).thenReturn(copyFrom);
        Mockito.when(route.getCopyTo()).thenReturn(copyTo);

        ConfigBasedDataset dataset = new ConfigBasedDataset(mockRC, properties, route);
        Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(localFs, copyConfiguration);
        return copyableFiles;
    }

    @Test
    public void testGetCopyableFiles() throws Exception {
        String sourceDir = getClass().getClassLoader().getResource("configBasedDatasetTest/src").getFile();
        String destinationDir = getClass().getClassLoader().getResource("configBasedDatasetTest/dest").getFile();
        long sourceWatermark = 100L;

        Collection<? extends CopyEntity> copyableFiles = testGetCopyableFilesHelper(sourceDir, destinationDir,
                sourceWatermark, false);
        Assert.assertEquals(copyableFiles.size(), 8);
        copyableFiles = testGetCopyableFilesHelper(sourceDir, destinationDir, sourceWatermark, true);
        Assert.assertEquals(copyableFiles.size(), 6);

        Set<Path> paths = Sets.newHashSet(new Path("dir1/file2"), new Path("dir1/file1"), new Path("dir2/file1"),
                new Path("dir2/file3"));
        for (CopyEntity copyEntity : copyableFiles) {
            if (copyEntity instanceof CopyableFile) {
                CopyableFile file = (CopyableFile) copyEntity;
                Path originRelativePath = PathUtils.relativizePath(
                        PathUtils.getPathWithoutSchemeAndAuthority(file.getOrigin().getPath()),
                        PathUtils.getPathWithoutSchemeAndAuthority(new Path(sourceDir)));
                Path targetRelativePath = PathUtils.relativizePath(
                        PathUtils.getPathWithoutSchemeAndAuthority(file.getDestination()),
                        PathUtils.getPathWithoutSchemeAndAuthority(new Path(destinationDir)));

                Assert.assertTrue(paths.contains(originRelativePath));
                Assert.assertTrue(paths.contains(targetRelativePath));
                Assert.assertEquals(originRelativePath, targetRelativePath);
            } else if (copyEntity instanceof PrePublishStep) {
                PrePublishStep pre = (PrePublishStep) copyEntity;
                Assert.assertTrue(pre.getStep() instanceof DeleteFileCommitStep);
                // need to delete this file
                Assert.assertTrue(pre.explain().indexOf("configBasedDatasetTest/dest/dir1/file1") > 0);
            } else if (copyEntity instanceof PostPublishStep) {
                PostPublishStep post = (PostPublishStep) copyEntity;
                Assert.assertTrue(post.getStep() instanceof WatermarkMetadataGenerationCommitStep);
                Assert.assertTrue(post.explain().indexOf("dest/_metadata") > 0
                        && post.explain().indexOf("" + sourceWatermark) > 0);
            } else {
                throw new Exception("Wrong type");
            }
        }
    }
}