co.cask.hydrator.plugin.CopybookTest.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.hydrator.plugin.CopybookTest.java

Source

/*
 * Copyright  2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.hydrator.plugin;

import co.cask.cdap.api.artifact.ArtifactVersion;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.dataset.table.Table;
import co.cask.cdap.etl.api.batch.BatchSource;
import co.cask.cdap.etl.batch.ETLBatchApplication;
import co.cask.cdap.etl.batch.mapreduce.ETLMapReduce;
import co.cask.cdap.etl.mock.batch.MockSink;
import co.cask.cdap.etl.mock.test.HydratorTestBase;
import co.cask.cdap.etl.proto.v2.ETLBatchConfig;
import co.cask.cdap.etl.proto.v2.ETLPlugin;
import co.cask.cdap.etl.proto.v2.ETLStage;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.artifact.AppRequest;
import co.cask.cdap.proto.artifact.ArtifactSummary;
import co.cask.cdap.proto.id.ArtifactId;
import co.cask.cdap.proto.id.NamespaceId;
import co.cask.cdap.test.ApplicationManager;
import co.cask.cdap.test.DataSetManager;
import co.cask.cdap.test.MapReduceManager;
import co.cask.cdap.test.TestConfiguration;
import co.cask.hydrator.common.Constants;
import co.cask.hydrator.plugin.batch.CopybookIOUtils;
import co.cask.hydrator.plugin.batch.CopybookSource;
import com.google.common.collect.ImmutableMap;
import net.sf.JRecord.Common.RecordException;
import net.sf.JRecord.Details.AbstractLine;
import net.sf.JRecord.Details.LayoutDetail;
import net.sf.JRecord.Details.Line;
import net.sf.JRecord.External.ExternalRecord;
import net.sf.JRecord.External.ToLayoutDetail;
import net.sf.JRecord.IO.AbstractLineWriter;
import net.sf.JRecord.IO.LineIOProvider;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

/**
 * Unit test for {@link CopybookSource} classes.
 */
public class CopybookTest extends HydratorTestBase {

    @ClassRule
    public static final TestConfiguration CONFIG = new TestConfiguration("explore.enabled", false);

    private static final ArtifactVersion CURRENT_VERSION = new ArtifactVersion("3.4.0-SNAPSHOT");
    private static final ArtifactId BATCH_APP_ARTIFACT_ID = NamespaceId.DEFAULT.artifact("etlbatch",
            CURRENT_VERSION.getVersion());
    private static final ArtifactSummary ETLBATCH_ARTIFACT = new ArtifactSummary(
            BATCH_APP_ARTIFACT_ID.getArtifact(), BATCH_APP_ARTIFACT_ID.getVersion());
    @ClassRule
    public static TemporaryFolder temporaryFolder = new TemporaryFolder();

    private String cblContents = "000100*                                                                         \n"
            + "000200*   DTAR020 IS THE OUTPUT FROM DTAB020 FROM THE IML                       \n"
            + "000300*   CENTRAL REPORTING SYSTEM                                              \n"
            + "000400*                                                                         \n"
            + "000500*   CREATED BY BRUCE ARTHUR  19/12/90                                     \n"
            + "000600*                                                                         \n"
            + "000700*   RECORD LENGTH IS 27.                                                  \n"
            + "000800*                                                                         \n"
            + "000900        03  DTAR020-KCODE-STORE-KEY.                                      \n"
            + "001000            05 DTAR020-KEYCODE-NO      PIC X(08).                         \n"
            + "001100            05 DTAR020-STORE-NO        PIC S9(03)   COMP-3.               \n"
            + "001200        03  DTAR020-DATE               PIC S9(07)   COMP-3.               \n"
            + "001300        03  DTAR020-DEPT-NO            PIC S9(03)   COMP-3.               \n"
            + "001400        03  DTAR020-QTY-SOLD           PIC S9(9)    COMP-3.               \n"
            + "001500        03  DTAR020-SALE-PRICE         PIC S9(9)V99 COMP-3.";

    @BeforeClass
    public static void setupTest() throws Exception {
        setupBatchArtifacts(BATCH_APP_ARTIFACT_ID, ETLBatchApplication.class);
        // add artifact for batch sources and sinks
        addPluginArtifact(NamespaceId.DEFAULT.artifact("copybookreader-plugins", "1.0.0"), BATCH_APP_ARTIFACT_ID,
                CopybookSource.class);
        FileInputFormat.setInputPaths(new JobConf(), new Path("src/test/resources"));
    }

    @AfterClass
    public static void tearDown() throws Exception {
        temporaryFolder.delete();
    }

    @Test
    public void testCopybookReaderWithRequiredFields() throws Exception {

        Schema schema = Schema.recordOf("record",
                Schema.Field.of("DTAR020-KEYCODE-NO", Schema.nullableOf(Schema.of(Schema.Type.STRING))),
                Schema.Field.of("DTAR020-DATE", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-DEPT-NO", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-QTY-SOLD", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-SALE-PRICE", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))));

        Map<String, String> sourceProperties = new ImmutableMap.Builder<String, String>()
                .put(Constants.Reference.REFERENCE_NAME, "TestCase")
                .put("binaryFilePath", "src/test/resources/DTAR020_FB.bin").put("copybookContents", cblContents)
                .put("drop", "DTAR020-STORE-NO").build();

        ETLStage source = new ETLStage("CopybookReader",
                new ETLPlugin("CopybookReader", BatchSource.PLUGIN_TYPE, sourceProperties, null));

        String outputDatasetName = "output-batchsourcetest";
        ETLStage sink = new ETLStage("sink", MockSink.getPlugin(outputDatasetName));

        ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink)
                .addConnection(source.getName(), sink.getName()).build();

        AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
        Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "CopybookReaderTest");
        ApplicationManager appManager = deployApplication(appId, appRequest);

        MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME);
        mrManager.start();
        mrManager.waitForFinish(5, TimeUnit.MINUTES);

        DataSetManager<Table> outputManager = getDataset(outputDatasetName);
        List<StructuredRecord> output = MockSink.readOutput(outputManager);

        Assert.assertEquals("Expected records", 2, output.size());

        Map<String, Double> result = new HashMap<>();
        result.put((String) output.get(0).get("DTAR020-KEYCODE-NO"),
                (Double) output.get(0).get("DTAR020-SALE-PRICE"));
        result.put((String) output.get(1).get("DTAR020-KEYCODE-NO"),
                (Double) output.get(1).get("DTAR020-SALE-PRICE"));

        Assert.assertEquals(4.87, result.get("63604808").doubleValue(), 0.1);
        Assert.assertEquals(5.01, result.get("69694158").doubleValue(), 0.1);
        Assert.assertEquals("Expected schema", output.get(0).getSchema(), schema);
    }

    @Test
    public void testCopybookReaderWithAllFields() throws Exception {

        Schema schema = Schema.recordOf("record",
                Schema.Field.of("DTAR020-KEYCODE-NO", Schema.nullableOf(Schema.of(Schema.Type.STRING))),
                Schema.Field.of("DTAR020-STORE-NO", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-DATE", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-DEPT-NO", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-QTY-SOLD", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-SALE-PRICE", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))));

        Map<String, String> sourceProperties = new ImmutableMap.Builder<String, String>()
                .put(Constants.Reference.REFERENCE_NAME, "TestCase")
                .put("binaryFilePath", "src/test/resources/DTAR020_FB.bin").put("copybookContents", cblContents)
                .put("maxSplitSize", "5").build();

        ETLStage source = new ETLStage("CopybookReader",
                new ETLPlugin("CopybookReader", BatchSource.PLUGIN_TYPE, sourceProperties, null));

        String outputDatasetName = "output-batchsource-test-wihtout-schema";
        ETLStage sink = new ETLStage("sink", MockSink.getPlugin(outputDatasetName));

        ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink)
                .addConnection(source.getName(), sink.getName()).build();

        AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
        Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "CopybookReaderTest");
        ApplicationManager appManager = deployApplication(appId, appRequest);

        MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME);
        mrManager.start();
        mrManager.waitForFinish(5, TimeUnit.MINUTES);

        DataSetManager<Table> outputManager = getDataset(outputDatasetName);
        List<StructuredRecord> output = MockSink.readOutput(outputManager);

        Assert.assertEquals("Expected records", 2, output.size());

        Map<String, Double> result = new HashMap<>();
        result.put((String) output.get(0).get("DTAR020-KEYCODE-NO"),
                (Double) output.get(0).get("DTAR020-SALE-PRICE"));
        result.put((String) output.get(1).get("DTAR020-KEYCODE-NO"),
                (Double) output.get(1).get("DTAR020-SALE-PRICE"));

        Assert.assertEquals(4.87, result.get("63604808").doubleValue(), 0.1);
        Assert.assertEquals(5.01, result.get("69694158").doubleValue(), 0.1);
        Assert.assertEquals("Expected schema", output.get(0).getSchema(), schema);
    }

    @Test
    public void testInvalidCopybookReaderSource() throws Exception {

        Map<String, String> sourceProperties = new ImmutableMap.Builder<String, String>()
                .put(Constants.Reference.REFERENCE_NAME, "TestCase")
                .put("binaryFilePath", "src/test/resources/DTAR020_FB.txt").build();

        ETLStage source = new ETLStage("CopybookReader",
                new ETLPlugin("CopybookReader", BatchSource.PLUGIN_TYPE, sourceProperties, null));

        String outputDatasetName = "output-batchsource-test-incorrect-schema";
        ETLStage sink = new ETLStage("sink", MockSink.getPlugin(outputDatasetName));

        ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink)
                .addConnection(source.getName(), sink.getName()).build();

        AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
        Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "CopybookReaderTest");
        try {
            deployApplication(appId, appRequest);
            Assert.fail();
        } catch (IllegalStateException e) {
            // expected - since cobol copybook is required
        }
    }

    @Test
    public void testDefaults() {
        CopybookSource.CopybookSourceConfig copybookSourceConfig = new CopybookSource.CopybookSourceConfig();
        Assert.assertEquals(Long.toString(CopybookSource.DEFAULT_MAX_SPLIT_SIZE_IN_MB * 1024 * 1024),
                copybookSourceConfig.getMaxSplitSize().toString());
    }

    @Test
    public void testDataTypes() throws Exception {

        cblContents = "000100*                                                                         \n"
                + "000200*   DTAR020 IS THE OUTPUT FROM DTAB020 FROM THE IML                       \n"
                + "000300*   CENTRAL REPORTING SYSTEM                                              \n"
                + "000400*                                                                         \n"
                + "000500*   CREATED BY BRUCE ARTHUR  19/12/90                                     \n"
                + "000600*                                                                         \n"
                + "000700*   RECORD LENGTH IS 27.                                                  \n"
                + "000800*                                                                         \n"
                + "000900        03  DTAR020-KCODE-STORE-KEY.                                      \n"
                + "001000            05 DTAR020-KEYCODE      \tPIC X(01).    \n"
                + "001100            05 DTAR020-STORE-NO        \tPIC 9(18) \t\tCOMP.               \n"
                + "001200        03  DTAR020-DATE               \tPIC S9(08)  \tCOMP-3.               \n"
                + "001300        03  DTAR020-DEPT-NO            \tpic S9(5)   \tBINARY.               \n"
                + "001400        03  DTAR020-QTY-SOLD           \tPIC S9(9)   \tCOMP-3.               \n"
                + "001500        03  DTAR020-SALE-PRICE         \tPIC S9(9)V99 \tCOMP-1.    \n"
                + "001600        03  DTAR020-MRP         \t\t \tPIC S9(9)V99\tCOMP-2.      \n"
                + "001700        03  DTAR020-ZONED-DECIMAL      \tPIC S9(5)V9.   \n"
                + "001800        03  DTAR020-ASSUMED-DECIMAL    \tPIC 999V99.   \n"
                + "001900        03  DTAR020-NUM-RIGHTJUSTIFIED \tPIC 9(5).";

        Schema schema = Schema.recordOf("record",
                Schema.Field.of("DTAR020-KEYCODE", Schema.nullableOf(Schema.of(Schema.Type.STRING))),
                Schema.Field.of("DTAR020-STORE-NO", Schema.nullableOf(Schema.of(Schema.Type.LONG))),
                Schema.Field.of("DTAR020-DATE", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-DEPT-NO", Schema.nullableOf(Schema.of(Schema.Type.LONG))),
                Schema.Field.of("DTAR020-QTY-SOLD", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-SALE-PRICE", Schema.nullableOf(Schema.of(Schema.Type.FLOAT))),
                Schema.Field.of("DTAR020-MRP", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-ZONED-DECIMAL", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-ASSUMED-DECIMAL", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
                Schema.Field.of("DTAR020-NUM-RIGHTJUSTIFIED", Schema.nullableOf(Schema.of(Schema.Type.INT))));

        Map<String, String> sourceProperties = new ImmutableMap.Builder<String, String>()
                .put(Constants.Reference.REFERENCE_NAME, "TestCase")
                .put("binaryFilePath", generateBinaryFile(cblContents)).put("copybookContents", cblContents)
                .build();

        ETLStage source = new ETLStage("CopybookReader",
                new ETLPlugin("CopybookReader", BatchSource.PLUGIN_TYPE, sourceProperties, null));

        String outputDatasetName = "output-batchsource-test-wihtout-schema";
        ETLStage sink = new ETLStage("sink", MockSink.getPlugin(outputDatasetName));

        ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink)
                .addConnection(source.getName(), sink.getName()).build();

        AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
        Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "CopybookReaderTest");
        ApplicationManager appManager = deployApplication(appId, appRequest);

        MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME);
        mrManager.start();
        mrManager.waitForFinish(20, TimeUnit.MINUTES);

        DataSetManager<Table> outputManager = getDataset(outputDatasetName);
        List<StructuredRecord> output = MockSink.readOutput(outputManager);

        StructuredRecord record = output.get(0);

        Assert.assertEquals("1", record.get("DTAR020-KEYCODE"));
        Assert.assertEquals(123, record.get("DTAR020-NUM-RIGHTJUSTIFIED"));
        Assert.assertEquals(20L, record.get("DTAR020-STORE-NO"));
        Assert.assertEquals(20140202, (Double) record.get("DTAR020-DATE"), 0);
        Assert.assertEquals(100L, record.get("DTAR020-DEPT-NO"));
        Assert.assertEquals(7, (Double) record.get("DTAR020-QTY-SOLD"), 0);
        Assert.assertEquals(7.15F, (Float) record.get("DTAR020-SALE-PRICE"), 0);
        Assert.assertEquals(7.30, (Double) record.get("DTAR020-MRP"), 0);
        Assert.assertEquals(-123.2, (Double) record.get("DTAR020-ZONED-DECIMAL"), 0);
        Assert.assertEquals(134.25, (Double) record.get("DTAR020-ASSUMED-DECIMAL"), 0);

        Assert.assertEquals("Expected schema", record.getSchema(), schema);
    }

    public String generateBinaryFile(String copybook) {

        ExternalRecord externalRecord = null;
        try {
            File file = temporaryFolder.newFile("TestFile.bin");
            String binaryFilePath = file.getPath();
            int fileStructure = net.sf.JRecord.Common.Constants.IO_FIXED_LENGTH;
            InputStream inputStream = IOUtils.toInputStream(copybook, "UTF-8");
            externalRecord = CopybookIOUtils.getExternalRecord(inputStream);
            LayoutDetail layout = ToLayoutDetail.getInstance().getLayout(externalRecord);
            AbstractLineWriter writer = LineIOProvider.getInstance().getLineWriter(fileStructure);
            AbstractLine saleRecord = new Line(layout);
            writer.open(binaryFilePath);
            saleRecord.getFieldValue("DTAR020-KEYCODE").set(1);
            saleRecord.getFieldValue("DTAR020-NUM-RIGHTJUSTIFIED").set(123);
            saleRecord.getFieldValue("DTAR020-STORE-NO").set(20);
            saleRecord.getFieldValue("DTAR020-DATE").set(20140202);
            saleRecord.getFieldValue("DTAR020-DEPT-NO").set(100);
            saleRecord.getFieldValue("DTAR020-QTY-SOLD").set(7);
            saleRecord.getFieldValue("DTAR020-SALE-PRICE").set(7.15);
            saleRecord.getFieldValue("DTAR020-MRP").set(7.30);
            saleRecord.getFieldValue("DTAR020-ZONED-DECIMAL").set(-123.2);
            saleRecord.getFieldValue("DTAR020-ASSUMED-DECIMAL").set(134.25);

            writer.write(saleRecord);
            writer.close();
            return binaryFilePath;
        } catch (RecordException e) {
            throw new IllegalArgumentException("Invalid copybook contents: " + e.getMessage(), e);
        } catch (IOException e) {
            throw new IllegalArgumentException("Error creating binary test file: " + e.getMessage(), e);
        }
    }
}