gobblin.publisher.BaseDataPublisherTest.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.publisher.BaseDataPublisherTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package gobblin.publisher;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.testng.Assert;
import org.testng.annotations.Test;

import com.google.common.collect.ImmutableList;
import com.google.common.io.Files;

import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
import gobblin.configuration.WorkUnitState;
import gobblin.metadata.MetadataMerger;
import gobblin.metadata.types.GlobalMetadata;
import gobblin.util.ForkOperatorUtils;

/**
 * Tests for BaseDataPublisher
 */
public class BaseDataPublisherTest {
    /**
     * Test DATA_PUBLISHER_METADATA_STR: a user should be able to put an arbitrary metadata string in job configuration
     * and have that written out.
     */
    @Test
    public void testMetadataStrOneBranch() throws IOException {
        State s = buildDefaultState(1);

        WorkUnitState wuState = new WorkUnitState();
        wuState.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar");
        addStateToWorkunit(s, wuState);

        BaseDataPublisher publisher = new BaseDataPublisher(s);
        publisher.publishMetadata(wuState);

        try (InputStream mdStream = new FileInputStream(openMetadataFile(s, 1, 0))) {
            String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8);
            Assert.assertEquals(mdBytes, "foobar", "Expected to read back metadata from string");
        }
    }

    /**
     * Test that DATA_PUBLISHER_METADATA_STR functionality works across multiple branches.
     */
    @Test
    public void testMetadataStrMultipleWorkUnitsAndBranches() throws IOException {
        final int numBranches = 3;
        State s = buildDefaultState(numBranches);

        List<WorkUnitState> workUnits = new ArrayList<>();
        for (int i = 0; i < numBranches; i++) {
            WorkUnitState wuState = new WorkUnitState();
            wuState.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar");
            addStateToWorkunit(s, wuState);
            workUnits.add(wuState);
        }

        BaseDataPublisher publisher = new BaseDataPublisher(s);
        publisher.publishMetadata(workUnits);

        for (int branch = 0; branch < numBranches; branch++) {
            try (InputStream mdStream = new FileInputStream(openMetadataFile(s, numBranches, branch))) {
                String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8);
                Assert.assertEquals(mdBytes, "foobar", "Expected to read back metadata from string");
            }
        }
    }

    /**
     * Test that an exception is properly thrown if we configure a merger that doesn't actually implement
     * MetadataMerger
     */
    @Test(expectedExceptions = IllegalArgumentException.class)
    public void testBogusMetadataMerger() throws IOException {
        State s = buildDefaultState(1);
        s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
        s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_MERGER_NAME_KEY, "java.lang.String");
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar");
        WorkUnitState wuState = new WorkUnitState();
        addStateToWorkunit(s, wuState);

        BaseDataPublisher publisher = new BaseDataPublisher(s);
        publisher.publishMetadata(Collections.singletonList(wuState));
    }

    /**
     * This test is testing several things at once:
     *  1. That a merger is called properly for all workunits in a brach
     *  2. That different mergers can be instantiated per branch
     */
    @Test
    public void testMergedMetadata() throws IOException {
        final int numBranches = 2;
        final int numWorkUnits = 10;

        State s = buildDefaultState(numBranches);

        for (int i = 0; i < numBranches; i++) {
            String mdKeyName = ForkOperatorUtils
                    .getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, numBranches, i);
            String mdMergerKeyName = ForkOperatorUtils.getPropertyNameForBranch(
                    ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_MERGER_NAME_KEY, numBranches, i);

            s.setProp(mdKeyName, "true");
            s.setProp(mdMergerKeyName,
                    (i % 2) == 0 ? TestAdditionMerger.class.getName() : TestMultiplicationMerger.class.getName());
        }

        // For each branch, metadata is (branchId+1*workUnitNumber+1) - adding 1 so we don't ever multiply by 0
        List<WorkUnitState> workUnits = new ArrayList<>();
        for (int workUnitId = 0; workUnitId < numWorkUnits; workUnitId++) {
            WorkUnitState wuState = new WorkUnitState();
            addStateToWorkunit(s, wuState);

            for (int branchId = 0; branchId < numBranches; branchId++) {
                String mdForBranchName = ForkOperatorUtils
                        .getPropertyNameForBranch(ConfigurationKeys.WRITER_METADATA_KEY, numBranches, branchId);
                wuState.setProp(mdForBranchName, String.valueOf((branchId + 1) * (workUnitId + 1)));
            }

            workUnits.add(wuState);
        }

        BaseDataPublisher publisher = new BaseDataPublisher(s);
        publisher.publishMetadata(workUnits);

        for (int branch = 0; branch < numBranches; branch++) {
            int expectedSum = (branch % 2 == 0) ? 0 : 1;
            for (int i = 0; i < numWorkUnits; i++) {
                if (branch % 2 == 0) {
                    expectedSum += (branch + 1) * (i + 1);
                } else {
                    expectedSum *= (branch + 1) * (i + 1);
                }
            }

            try (InputStream mdStream = new FileInputStream(openMetadataFile(s, numBranches, branch))) {
                String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8);
                Assert.assertEquals(mdBytes, String.valueOf(expectedSum),
                        "Expected to read back correctly merged metadata from string");
            }
        }
    }

    @Test
    public void testNoOutputWhenDisabled() throws IOException {
        State s = buildDefaultState(1);

        WorkUnitState wuState = new WorkUnitState();
        addStateToWorkunit(s, wuState);

        wuState.setProp(ConfigurationKeys.WRITER_METADATA_KEY, "abcdefg");

        BaseDataPublisher publisher = new BaseDataPublisher(s);
        publisher.publishMetadata(Collections.singletonList(wuState));

        File mdFile = openMetadataFile(s, 1, 0);
        Assert.assertFalse(mdFile.exists(),
                "Internal metadata from writer should not be written out if no merger is set in config");
    }

    @Test
    public void testWithPartitionKey() throws IOException {
        File publishPath = Files.createTempDir();
        try {
            File part1 = new File(publishPath, "1-2-3-4");
            part1.mkdir();

            File part2 = new File(publishPath, "5-6-7-8");
            part2.mkdir();

            State s = buildDefaultState(1);
            String md = new GlobalMetadata().toJson();

            s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
            s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
            s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
            s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath());
            s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
            s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");

            WorkUnitState wuState1 = new WorkUnitState();
            wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4");
            wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
            addStateToWorkunit(s, wuState1);

            WorkUnitState wuState2 = new WorkUnitState();
            wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "5-6-7-8");
            wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
            addStateToWorkunit(s, wuState2);

            BaseDataPublisher publisher = new BaseDataPublisher(s);
            publisher.publishMetadata(ImmutableList.of(wuState1, wuState2));

            Assert.assertTrue(new File(part1, "metadata.json").exists());
            Assert.assertTrue(new File(part2, "metadata.json").exists());
        } finally {
            FileUtils.deleteDirectory(publishPath);
        }
    }

    public static class TestAdditionMerger implements MetadataMerger<String> {
        private int sum = 0;

        @Override
        public void update(String metadata) {
            sum += Integer.valueOf(metadata);
        }

        @Override
        public String getMergedMetadata() {
            return String.valueOf(sum);
        }
    }

    public static class TestMultiplicationMerger implements MetadataMerger<String> {
        private int product = 1;

        public TestMultiplicationMerger(Properties config) {
            // testing ctor call
        }

        @Override
        public void update(String metadata) {
            product *= Integer.valueOf(metadata);
        }

        @Override
        public String getMergedMetadata() {
            return String.valueOf(product);
        }
    }

    private void addStateToWorkunit(State s, WorkUnitState wuState) {
        for (Map.Entry<Object, Object> prop : s.getProperties().entrySet()) {
            wuState.setProp((String) prop.getKey(), prop.getValue());
        }
    }

    private File openMetadataFile(State state, int numBranches, int branchId) {
        String dir = state.getProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
        String fileName = state.getProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE);
        if (numBranches > 1) {
            fileName += "." + String.valueOf(branchId);
        }
        return new File(dir, fileName);
    }

    private State buildDefaultState(int numBranches) throws IOException {
        State state = new State();

        state.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, numBranches);
        File tmpLocation = File.createTempFile("metadata", "");
        tmpLocation.delete();
        state.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR, tmpLocation.getParent());
        state.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, tmpLocation.getName());

        return state;
    }
}