co.cask.hydrator.plugin.ValueMapperTest.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.hydrator.plugin.ValueMapperTest.java

Source

/*
 * Copyright  2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.hydrator.plugin;

import co.cask.cdap.api.artifact.ArtifactVersion;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.dataset.lib.KeyValueTable;
import co.cask.cdap.api.dataset.table.Table;
import co.cask.cdap.etl.api.Transform;
import co.cask.cdap.etl.batch.ETLBatchApplication;
import co.cask.cdap.etl.batch.mapreduce.ETLMapReduce;
import co.cask.cdap.etl.mock.batch.MockSink;
import co.cask.cdap.etl.mock.batch.MockSource;
import co.cask.cdap.etl.mock.test.HydratorTestBase;
import co.cask.cdap.etl.proto.v2.ETLBatchConfig;
import co.cask.cdap.etl.proto.v2.ETLPlugin;
import co.cask.cdap.etl.proto.v2.ETLStage;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.artifact.AppRequest;
import co.cask.cdap.proto.artifact.ArtifactSummary;
import co.cask.cdap.proto.id.ArtifactId;
import co.cask.cdap.proto.id.NamespaceId;
import co.cask.cdap.test.ApplicationManager;
import co.cask.cdap.test.DataSetManager;
import co.cask.cdap.test.MapReduceManager;
import co.cask.cdap.test.TestConfiguration;
import co.cask.hydrator.common.MockPipelineConfigurer;
import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.csv.CSVFormat;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

/**
 * Test case for {@link ValueMapper}.
 */
public class ValueMapperTest extends HydratorTestBase {

    private static final Schema SOURCE_SCHEMA = Schema.recordOf("sourceRecord",
            Schema.Field.of(ValueMapperTest.ID, Schema.of(Schema.Type.STRING)),
            Schema.Field.of(ValueMapperTest.NAME, Schema.of(Schema.Type.STRING)),
            Schema.Field.of(ValueMapperTest.SALARY, Schema.of(Schema.Type.STRING)),
            Schema.Field.of(ValueMapperTest.DESIGNATIONID, Schema.nullableOf(Schema.of(Schema.Type.STRING))));

    private static final String ID = "id";
    private static final String NAME = "name";
    private static final String SALARY = "salary";
    private static final String DESIGNATIONID = "designationid";
    private static final String DESIGNATIONNAME = "designationName";
    private static final String SALARYDESC = "salaryDesc";

    @ClassRule
    public static final TestConfiguration CONFIG = new TestConfiguration();

    private static final ArtifactVersion CURRENT_VERSION = new ArtifactVersion("3.2.0");

    private static final ArtifactId BATCH_APP_ARTIFACT_ID = NamespaceId.DEFAULT.artifact("etlbatch",
            CURRENT_VERSION.getVersion());
    private static final ArtifactSummary ETLBATCH_ARTIFACT = new ArtifactSummary(
            BATCH_APP_ARTIFACT_ID.getArtifact(), BATCH_APP_ARTIFACT_ID.getVersion());

    @BeforeClass
    public static void setupTestClass() throws Exception {
        // Add the ETL batch artifact and mock plugins.
        setupBatchArtifacts(BATCH_APP_ARTIFACT_ID, ETLBatchApplication.class);

        // Add our plugins artifact with the ETL batch artifact as its parent.
        // This will make our plugins available to the ETL batch.
        addPluginArtifact(NamespaceId.DEFAULT.artifact("transform-plugins", "1.0.0"), BATCH_APP_ARTIFACT_ID,
                ValueMapper.class, CSVFormat.class, Base64.class);
    }

    @Test
    public void testEmptyAndNull() throws Exception {

        String inputTable = "input_table_test_Empty_Null";
        ETLStage source = new ETLStage("source", MockSource.getPlugin(inputTable));

        Map<String, String> sourceproperties = new ImmutableMap.Builder<String, String>()
                .put("mapping", "designationid:designation_lookup_table_test_Empty_Null:designationName")
                .put("defaults", "designationid:DEFAULTID").build();

        ETLStage transform = new ETLStage("transform",
                new ETLPlugin("ValueMapper", Transform.PLUGIN_TYPE, sourceproperties, null));

        String sinkTable = "output_table_test_Empty_Null";
        ETLStage sink = new ETLStage("sink", MockSink.getPlugin(sinkTable));

        ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(transform)
                .addStage(sink).addConnection(source.getName(), transform.getName())
                .addConnection(transform.getName(), sink.getName()).build();

        AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
        Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "valuemappertest_test_Empty_Null");
        ApplicationManager appManager = deployApplication(appId, appRequest);

        addDatasetInstance(KeyValueTable.class.getName(), "designation_lookup_table_test_Empty_Null");
        DataSetManager<KeyValueTable> dataSetManager = getDataset("designation_lookup_table_test_Empty_Null");
        KeyValueTable keyValueTable = dataSetManager.get();
        keyValueTable.write("1".getBytes(Charsets.UTF_8), "SE".getBytes(Charsets.UTF_8));
        keyValueTable.write("2".getBytes(Charsets.UTF_8), "SSE".getBytes(Charsets.UTF_8));
        keyValueTable.write("3".getBytes(Charsets.UTF_8), "ML".getBytes(Charsets.UTF_8));
        dataSetManager.flush();

        DataSetManager<Table> inputManager = getDataset(inputTable);
        List<StructuredRecord> input = ImmutableList.of(
                StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "100").set(NAME, "John").set(SALARY, "1000")
                        .set(DESIGNATIONID, null).build(),
                StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "101").set(NAME, "Kerry").set(SALARY, "1030")
                        .set(DESIGNATIONID, "2").build(),
                StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "102").set(NAME, "Mathew").set(SALARY, "1230")
                        .set(DESIGNATIONID, "").build(),
                StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "103").set(NAME, "Allie").set(SALARY, "2000")
                        .set(DESIGNATIONID, "4").build());

        MockSource.writeInput(inputManager, input);

        MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME);
        mrManager.start();
        mrManager.waitForFinish(5, TimeUnit.MINUTES);

        DataSetManager<Table> outputManager = getDataset(sinkTable);
        List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
        Map<String, String> nameDesignationMap = new HashMap<String, String>();
        nameDesignationMap.put("John", "DEFAULTID");
        nameDesignationMap.put("Kerry", "SSE");
        nameDesignationMap.put("Mathew", "DEFAULTID");
        nameDesignationMap.put("Allie", "DEFAULTID");

        Assert.assertEquals(4, outputRecords.size());
        Assert.assertEquals(nameDesignationMap.get(outputRecords.get(0).get(NAME)),
                outputRecords.get(0).get(DESIGNATIONNAME));
        Assert.assertEquals(nameDesignationMap.get(outputRecords.get(1).get(NAME)),
                outputRecords.get(1).get(DESIGNATIONNAME));
        Assert.assertEquals(nameDesignationMap.get(outputRecords.get(2).get(NAME)),
                outputRecords.get(2).get(DESIGNATIONNAME));
        Assert.assertEquals(nameDesignationMap.get(outputRecords.get(3).get(NAME)),
                outputRecords.get(3).get(DESIGNATIONNAME));

    }

    @Test
    public void testWithNoDefaults() throws Exception {

        String inputTable = "input_table_without_defaults";
        ETLStage source = new ETLStage("source", MockSource.getPlugin(inputTable));

        Map<String, String> sourceproperties = new ImmutableMap.Builder<String, String>()
                .put("mapping", "designationid:designation_lookup_table_without_defaults:designationName")
                .put("defaults", "").build();

        ETLStage transform = new ETLStage("transform",
                new ETLPlugin("ValueMapper", Transform.PLUGIN_TYPE, sourceproperties, null));

        String sinkTable = "output_table_without_defaults";
        ETLStage sink = new ETLStage("sink", MockSink.getPlugin(sinkTable));

        ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(transform)
                .addStage(sink).addConnection(source.getName(), transform.getName())
                .addConnection(transform.getName(), sink.getName()).build();

        AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
        Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "valuemappertest_without_defaults");
        ApplicationManager appManager = deployApplication(appId, appRequest);

        addDatasetInstance(KeyValueTable.class.getName(), "designation_lookup_table_without_defaults");
        DataSetManager<KeyValueTable> dataSetManager = getDataset("designation_lookup_table_without_defaults");
        KeyValueTable keyValueTable = dataSetManager.get();
        keyValueTable.write("1".getBytes(Charsets.UTF_8), "SE".getBytes(Charsets.UTF_8));
        keyValueTable.write("2".getBytes(Charsets.UTF_8), "SSE".getBytes(Charsets.UTF_8));
        keyValueTable.write("3".getBytes(Charsets.UTF_8), "ML".getBytes(Charsets.UTF_8));
        keyValueTable.write("4".getBytes(Charsets.UTF_8), "TL".getBytes(Charsets.UTF_8));
        dataSetManager.flush();

        DataSetManager<Table> inputManager = getDataset(inputTable);
        List<StructuredRecord> input = ImmutableList.of(
                StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "100").set(NAME, "John").set(SALARY, "1000")
                        .set(DESIGNATIONID, null).build(),
                StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "101").set(NAME, "Kerry").set(SALARY, "1030")
                        .set(DESIGNATIONID, "2").build(),
                StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "102").set(NAME, "Mathew").set(SALARY, "1230")
                        .set(DESIGNATIONID, "").build(),
                StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "103").set(NAME, "Allie").set(SALARY, "2000")
                        .set(DESIGNATIONID, "4").build());
        MockSource.writeInput(inputManager, input);

        MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME);
        mrManager.start();
        mrManager.waitForFinish(5, TimeUnit.MINUTES);

        DataSetManager<Table> outputManager = getDataset(sinkTable);
        List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
        Map<String, String> nameDesignationMap = new HashMap<String, String>();
        nameDesignationMap.put("John", null);
        nameDesignationMap.put("Kerry", "SSE");
        nameDesignationMap.put("Mathew", "");
        nameDesignationMap.put("Allie", "TL");

        Map<String, String> nameSalaryMap = new HashMap<String, String>();
        nameSalaryMap.put("John", "1000");
        nameSalaryMap.put("Kerry", "1030");
        nameSalaryMap.put("Mathew", "1230");
        nameSalaryMap.put("Allie", "2000");

        Assert.assertEquals(4, outputRecords.size());
        Assert.assertEquals(nameDesignationMap.get(outputRecords.get(0).get(NAME)),
                outputRecords.get(0).get(DESIGNATIONNAME));
        Assert.assertEquals(nameDesignationMap.get(outputRecords.get(1).get(NAME)),
                outputRecords.get(1).get(DESIGNATIONNAME));
        Assert.assertEquals(nameDesignationMap.get(outputRecords.get(2).get(NAME)),
                outputRecords.get(2).get(DESIGNATIONNAME));
        Assert.assertEquals(nameDesignationMap.get(outputRecords.get(3).get(NAME)),
                outputRecords.get(3).get(DESIGNATIONNAME));

        Assert.assertEquals(nameSalaryMap.get(outputRecords.get(0).get(NAME)), outputRecords.get(0).get(SALARY));
        Assert.assertEquals(nameSalaryMap.get(outputRecords.get(1).get(NAME)), outputRecords.get(1).get(SALARY));
        Assert.assertEquals(nameSalaryMap.get(outputRecords.get(2).get(NAME)), outputRecords.get(2).get(SALARY));
        Assert.assertEquals(nameSalaryMap.get(outputRecords.get(3).get(NAME)), outputRecords.get(3).get(SALARY));

    }

    @Test
    public void testWithMultipleMapping() throws Exception {

        String inputTable = "input_table_with_multi_mapping";
        ETLStage source = new ETLStage("source", MockSource.getPlugin(inputTable));

        Map<String, String> sourceproperties = new ImmutableMap.Builder<String, String>()
                .put("mapping",
                        "designationid:designation_lookup_table_with_multi_mapping:designationName,"
                                + "salary:salary_lookup_table:salaryDesc")
                .put("defaults", "designationid:DefaultID").build();

        ETLStage transform = new ETLStage("transform",
                new ETLPlugin("ValueMapper", Transform.PLUGIN_TYPE, sourceproperties, null));

        String sinkTable = "output_table_with_multi_mapping";
        ETLStage sink = new ETLStage("sink", MockSink.getPlugin(sinkTable));

        ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(transform)
                .addStage(sink).addConnection(source.getName(), transform.getName())
                .addConnection(transform.getName(), sink.getName()).build();

        AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
        Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "valuemappertest_with_multi_mapping");
        ApplicationManager appManager = deployApplication(appId, appRequest);

        addDatasetInstance(KeyValueTable.class.getName(), "designation_lookup_table_with_multi_mapping");
        DataSetManager<KeyValueTable> dataSetManager = getDataset("designation_lookup_table_with_multi_mapping");
        KeyValueTable keyValueTable = dataSetManager.get();
        keyValueTable.write("1".getBytes(Charsets.UTF_8), "SE".getBytes(Charsets.UTF_8));
        keyValueTable.write("2".getBytes(Charsets.UTF_8), "SSE".getBytes(Charsets.UTF_8));
        keyValueTable.write("3".getBytes(Charsets.UTF_8), "ML".getBytes(Charsets.UTF_8));
        dataSetManager.flush();

        addDatasetInstance(KeyValueTable.class.getName(), "salary_lookup_table");
        DataSetManager<KeyValueTable> salaryDataSetManager = getDataset("salary_lookup_table");
        KeyValueTable dsalaryKeyValueTable = salaryDataSetManager.get();
        dsalaryKeyValueTable.write("1000".getBytes(Charsets.UTF_8), "Low".getBytes(Charsets.UTF_8));
        dsalaryKeyValueTable.write("2000".getBytes(Charsets.UTF_8), "Medium".getBytes(Charsets.UTF_8));
        dsalaryKeyValueTable.write("5000".getBytes(Charsets.UTF_8), "High".getBytes(Charsets.UTF_8));
        salaryDataSetManager.flush();

        DataSetManager<Table> inputManager = getDataset(inputTable);
        List<StructuredRecord> input = ImmutableList.of(
                StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "100").set(NAME, "John").set(SALARY, "1000")
                        .set(DESIGNATIONID, "1").build(),
                StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "101").set(NAME, "Kerry").set(SALARY, "2000")
                        .set(DESIGNATIONID, "2").build(),
                StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "102").set(NAME, "Mathew").set(SALARY, "5000")
                        .set(DESIGNATIONID, "3").build());
        MockSource.writeInput(inputManager, input);

        MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME);
        mrManager.start();
        mrManager.waitForFinish(5, TimeUnit.MINUTES);

        DataSetManager<Table> outputManager = getDataset(sinkTable);
        List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);

        Map<String, String> nameDesignationMap = new HashMap<String, String>();
        nameDesignationMap.put("John", "SE");
        nameDesignationMap.put("Kerry", "SSE");
        nameDesignationMap.put("Mathew", "ML");

        Assert.assertEquals(3, outputRecords.size());
        Assert.assertEquals(nameDesignationMap.get(outputRecords.get(0).get(NAME)),
                outputRecords.get(0).get(DESIGNATIONNAME));
        Assert.assertEquals(nameDesignationMap.get(outputRecords.get(1).get(NAME)),
                outputRecords.get(1).get(DESIGNATIONNAME));
        Assert.assertEquals(nameDesignationMap.get(outputRecords.get(2).get(NAME)),
                outputRecords.get(2).get(DESIGNATIONNAME));

        Map<String, String> nameSalaryMap = new HashMap<String, String>();
        nameSalaryMap.put("John", "Low");
        nameSalaryMap.put("Kerry", "Medium");
        nameSalaryMap.put("Mathew", "High");

        Assert.assertEquals(nameSalaryMap.get(outputRecords.get(0).get(NAME)),
                outputRecords.get(0).get(SALARYDESC));
        Assert.assertEquals(nameSalaryMap.get(outputRecords.get(1).get(NAME)),
                outputRecords.get(1).get(SALARYDESC));
        Assert.assertEquals(nameSalaryMap.get(outputRecords.get(2).get(NAME)),
                outputRecords.get(2).get(SALARYDESC));

    }

    @Test(expected = IllegalArgumentException.class)
    public void testStringHandling() throws Exception {

        Schema inputSchema = Schema.recordOf("sourceRecord", Schema.Field.of(ID, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(NAME, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(SALARY, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(DESIGNATIONID, Schema.of(Schema.Type.INT)));

        ValueMapper.Config config = new ValueMapper.Config("designationid:designation_lookup_table:designationName",
                "designationid:DEFAULTID");

        MockPipelineConfigurer configurer = new MockPipelineConfigurer(inputSchema);
        new ValueMapper(config).configurePipeline(configurer);

    }

    @Test
    public void testSchemaHandling() throws Exception {

        Schema inputSchema = Schema.recordOf("sourceRecord", Schema.Field.of(ID, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(NAME, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(SALARY, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(DESIGNATIONID, Schema.nullableOf(Schema.of(Schema.Type.STRING))));

        ValueMapper.Config config = new ValueMapper.Config("designationid:designation_lookup_table:designationName",
                "designationid:DEFAULTID");

        MockPipelineConfigurer configurer = new MockPipelineConfigurer(inputSchema);
        new ValueMapper(config).configurePipeline(configurer);
        Schema outputSchema = configurer.getOutputSchema();

        Schema expectedOutputSchema = Schema.recordOf("sourceRecord.formatted",
                Schema.Field.of(ID, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(NAME, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(SALARY, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(DESIGNATIONNAME, Schema.of(Schema.Type.STRING)));

        Assert.assertEquals(expectedOutputSchema, outputSchema);

    }

    @Test(expected = IllegalArgumentException.class)
    public void testMappingValidation() throws Exception {

        Schema inputSchema = Schema.recordOf("sourceRecord", Schema.Field.of(ID, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(NAME, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(SALARY, Schema.of(Schema.Type.STRING)),
                Schema.Field.of(DESIGNATIONID, Schema.of(Schema.Type.STRING)));

        ValueMapper.Config config = new ValueMapper.Config("designationid:designation_lookup_table",
                "designationid:DEFAULTID");

        MockPipelineConfigurer configurer = new MockPipelineConfigurer(inputSchema);
        new ValueMapper(config).configurePipeline(configurer);

    }

}