org.apache.nifi.processors.kite.TestConvertAvroSchema.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nifi.processors.kite.TestConvertAvroSchema.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.nifi.processors.kite;

import static org.apache.nifi.processors.kite.TestUtil.streamFor;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.text.NumberFormat;
import java.text.ParseException;
import java.util.List;
import java.util.Locale;

import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericData.Record;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.commons.lang.LocaleUtils;
import org.apache.nifi.processors.kite.AbstractKiteConvertProcessor.CodecType;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.Assert;
import org.junit.Test;

import com.google.common.collect.Lists;

public class TestConvertAvroSchema {

    public static final Schema INPUT_SCHEMA = SchemaBuilder.record("InputTest").fields().requiredString("id")
            .requiredString("primaryColor").optionalString("secondaryColor").optionalString("price").endRecord();

    public static final Schema OUTPUT_SCHEMA = SchemaBuilder.record("Test").fields().requiredLong("id")
            .requiredString("color").optionalDouble("price").endRecord();

    public static final String MAPPING = "[{\"source\":\"primaryColor\", \"target\":\"color\"}]";

    public static final String FAILURE_SUMMARY = "Cannot convert free to double";

    @Test
    public void testBasicConversion() throws IOException {
        TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
        runner.assertNotValid();
        runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, INPUT_SCHEMA.toString());
        runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA, OUTPUT_SCHEMA.toString());
        Locale locale = Locale.getDefault();
        runner.setProperty("primaryColor", "color");
        runner.assertValid();

        NumberFormat format = NumberFormat.getInstance(locale);

        // Two valid rows, and one invalid because "free" is not a double.
        Record goodRecord1 = dataBasic("1", "blue", null, null);
        Record goodRecord2 = dataBasic("2", "red", "yellow", format.format(5.5));
        Record badRecord = dataBasic("3", "red", "yellow", "free");
        List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2, badRecord);

        runner.enqueue(streamFor(input));
        runner.run();

        long converted = runner.getCounterValue("Converted records");
        long errors = runner.getCounterValue("Conversion errors");
        Assert.assertEquals("Should convert 2 rows", 2, converted);
        Assert.assertEquals("Should reject 1 rows", 1, errors);

        runner.assertTransferCount("success", 1);
        runner.assertTransferCount("failure", 1);

        MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
        GenericDatumReader<Record> reader = new GenericDatumReader<Record>(INPUT_SCHEMA);
        DataFileStream<Record> stream = new DataFileStream<Record>(
                new ByteArrayInputStream(runner.getContentAsByteArray(incompatible)), reader);
        int count = 0;
        for (Record r : stream) {
            Assert.assertEquals(badRecord, r);
            count++;
        }
        stream.close();
        Assert.assertEquals(1, count);
        Assert.assertEquals("Should accumulate error messages", FAILURE_SUMMARY,
                incompatible.getAttribute("errors"));

        GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(OUTPUT_SCHEMA);
        DataFileStream<Record> successStream = new DataFileStream<Record>(
                new ByteArrayInputStream(
                        runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))),
                successReader);
        count = 0;
        for (Record r : successStream) {
            if (count == 0) {
                Assert.assertEquals(convertBasic(goodRecord1, locale), r);
            } else {
                Assert.assertEquals(convertBasic(goodRecord2, locale), r);
            }
            count++;
        }
        successStream.close();
        Assert.assertEquals(2, count);
    }

    @Test
    public void testBasicConversionWithCompression() throws IOException {
        TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
        runner.assertNotValid();
        runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, INPUT_SCHEMA.toString());
        runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA, OUTPUT_SCHEMA.toString());
        runner.setProperty(AbstractKiteConvertProcessor.COMPRESSION_TYPE, CodecType.BZIP2.toString());
        Locale locale = Locale.getDefault();
        runner.setProperty("primaryColor", "color");
        runner.assertValid();

        NumberFormat format = NumberFormat.getInstance(locale);

        // Two valid rows, and one invalid because "free" is not a double.
        Record goodRecord1 = dataBasic("1", "blue", null, null);
        Record goodRecord2 = dataBasic("2", "red", "yellow", format.format(5.5));
        Record badRecord = dataBasic("3", "red", "yellow", "free");
        List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2, badRecord);

        runner.enqueue(streamFor(input));
        runner.run();

        long converted = runner.getCounterValue("Converted records");
        long errors = runner.getCounterValue("Conversion errors");
        Assert.assertEquals("Should convert 2 rows", 2, converted);
        Assert.assertEquals("Should reject 1 rows", 1, errors);

        runner.assertTransferCount("success", 1);
        runner.assertTransferCount("failure", 1);

        MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
        GenericDatumReader<Record> reader = new GenericDatumReader<Record>(INPUT_SCHEMA);
        DataFileStream<Record> stream = new DataFileStream<Record>(
                new ByteArrayInputStream(runner.getContentAsByteArray(incompatible)), reader);
        int count = 0;
        for (Record r : stream) {
            Assert.assertEquals(badRecord, r);
            count++;
        }
        stream.close();
        Assert.assertEquals(1, count);
        Assert.assertEquals("Should accumulate error messages", FAILURE_SUMMARY,
                incompatible.getAttribute("errors"));

        GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(OUTPUT_SCHEMA);
        DataFileStream<Record> successStream = new DataFileStream<Record>(
                new ByteArrayInputStream(
                        runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))),
                successReader);
        count = 0;
        for (Record r : successStream) {
            if (count == 0) {
                Assert.assertEquals(convertBasic(goodRecord1, locale), r);
            } else {
                Assert.assertEquals(convertBasic(goodRecord2, locale), r);
            }
            count++;
        }
        successStream.close();
        Assert.assertEquals(2, count);
    }

    @Test
    public void testBasicConversionWithLocales() throws IOException {
        testBasicConversionWithLocale("en_US");
        testBasicConversionWithLocale("fr_FR");
    }

    public void testBasicConversionWithLocale(String localeString) throws IOException {
        TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
        runner.assertNotValid();
        runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, INPUT_SCHEMA.toString());
        runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA, OUTPUT_SCHEMA.toString());
        Locale locale = LocaleUtils.toLocale(localeString);
        runner.setProperty(ConvertAvroSchema.LOCALE, localeString);
        runner.setProperty("primaryColor", "color");
        runner.assertValid();

        NumberFormat format = NumberFormat.getInstance(locale);

        // Two valid rows, and one invalid because "free" is not a double.
        Record goodRecord1 = dataBasic("1", "blue", null, null);
        Record goodRecord2 = dataBasic("2", "red", "yellow", format.format(5.5));
        Record badRecord = dataBasic("3", "red", "yellow", "free");
        List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2, badRecord);

        runner.enqueue(streamFor(input));
        runner.run();

        long converted = runner.getCounterValue("Converted records");
        long errors = runner.getCounterValue("Conversion errors");
        Assert.assertEquals("Should convert 2 rows", 2, converted);
        Assert.assertEquals("Should reject 1 rows", 1, errors);

        runner.assertTransferCount("success", 1);
        runner.assertTransferCount("failure", 1);

        MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
        GenericDatumReader<Record> reader = new GenericDatumReader<Record>(INPUT_SCHEMA);
        DataFileStream<Record> stream = new DataFileStream<Record>(
                new ByteArrayInputStream(runner.getContentAsByteArray(incompatible)), reader);
        int count = 0;
        for (Record r : stream) {
            Assert.assertEquals(badRecord, r);
            count++;
        }
        stream.close();
        Assert.assertEquals(1, count);
        Assert.assertEquals("Should accumulate error messages", FAILURE_SUMMARY,
                incompatible.getAttribute("errors"));

        GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(OUTPUT_SCHEMA);
        DataFileStream<Record> successStream = new DataFileStream<Record>(
                new ByteArrayInputStream(
                        runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))),
                successReader);
        count = 0;
        for (Record r : successStream) {
            if (count == 0) {
                Assert.assertEquals(convertBasic(goodRecord1, locale), r);
            } else {
                Assert.assertEquals(convertBasic(goodRecord2, locale), r);
            }
            count++;
        }
        successStream.close();
        Assert.assertEquals(2, count);
    }

    @Test
    public void testNestedConversion() throws IOException {
        TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
        runner.assertNotValid();
        runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, TestAvroRecordConverter.NESTED_RECORD_SCHEMA.toString());
        runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA,
                TestAvroRecordConverter.UNNESTED_OUTPUT_SCHEMA.toString());
        runner.setProperty("parent.id", "parentId");
        runner.assertValid();

        // Two valid rows
        Record goodRecord1 = dataNested(1L, "200", null, null);
        Record goodRecord2 = dataNested(2L, "300", 5L, "ParentCompany");
        List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2);

        runner.enqueue(streamFor(input));
        runner.run();

        long converted = runner.getCounterValue("Converted records");
        long errors = runner.getCounterValue("Conversion errors");
        Assert.assertEquals("Should convert 2 rows", 2, converted);
        Assert.assertEquals("Should reject 0 rows", 0, errors);

        runner.assertTransferCount("success", 1);
        runner.assertTransferCount("failure", 0);

        GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(
                TestAvroRecordConverter.UNNESTED_OUTPUT_SCHEMA);
        DataFileStream<Record> successStream = new DataFileStream<Record>(
                new ByteArrayInputStream(
                        runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))),
                successReader);
        int count = 0;
        for (Record r : successStream) {
            if (count == 0) {
                Assert.assertEquals(convertNested(goodRecord1), r);
            } else {
                Assert.assertEquals(convertNested(goodRecord2), r);
            }
            count++;
        }
        successStream.close();
        Assert.assertEquals(2, count);
    }

    private Record convertBasic(Record inputRecord, Locale locale) {
        Record result = new Record(OUTPUT_SCHEMA);
        result.put("id", Long.parseLong(inputRecord.get("id").toString()));
        result.put("color", inputRecord.get("primaryColor").toString());
        if (inputRecord.get("price") == null) {
            result.put("price", null);
        } else {
            final NumberFormat format = NumberFormat.getInstance(locale);
            double price;
            try {
                price = format.parse(inputRecord.get("price").toString()).doubleValue();
            } catch (ParseException e) {
                // Shouldn't happen
                throw new RuntimeException(e);
            }
            result.put("price", price);
        }
        return result;
    }

    private Record dataBasic(String id, String primaryColor, String secondaryColor, String price) {
        Record result = new Record(INPUT_SCHEMA);
        result.put("id", id);
        result.put("primaryColor", primaryColor);
        result.put("secondaryColor", secondaryColor);
        result.put("price", price);
        return result;
    }

    private Record convertNested(Record inputRecord) {
        Record result = new Record(TestAvroRecordConverter.UNNESTED_OUTPUT_SCHEMA);
        result.put("l1", inputRecord.get("l1"));
        result.put("s1", Long.parseLong(inputRecord.get("s1").toString()));
        if (inputRecord.get("parent") != null) {
            // output schema doesn't have parent name.
            result.put("parentId", ((Record) inputRecord.get("parent")).get("id"));
        }
        return result;
    }

    private Record dataNested(long id, String companyName, Long parentId, String parentName) {
        Record result = new Record(TestAvroRecordConverter.NESTED_RECORD_SCHEMA);
        result.put("l1", id);
        result.put("s1", companyName);
        if (parentId != null || parentName != null) {
            Record parent = new Record(TestAvroRecordConverter.NESTED_PARENT_SCHEMA);
            parent.put("id", parentId);
            parent.put("name", parentName);
            result.put("parent", parent);
        }
        return result;
    }
}