org.apache.drill.TestFrameworkTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.drill.TestFrameworkTest.java

Source

/*******************************************************************************
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package org.apache.drill;

import static org.apache.drill.TestBuilder.listOf;
import static org.apache.drill.TestBuilder.mapOf;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;

import java.math.BigDecimal;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.hamcrest.CoreMatchers;
import org.junit.Test;

import com.google.common.collect.Lists;

// TODO - update framework to remove any dependency on the Drill engine for reading baseline result sets
// currently using it with the assumption that the csv and json readers are well tested, and handling diverse
// types in the test framework would require doing some redundant work to enable casting outside of Drill or
// some better tooling to generate parquet files that have all of the parquet types
public class TestFrameworkTest extends BaseTestQuery {

    private static String CSV_COLS = " cast(columns[0] as bigint) employee_id, columns[1] as first_name, columns[2] as last_name ";

    @Test(expected = AssertionError.class)
    public void testSchemaTestBuilderSetInvalidBaselineValues() throws Exception {
        final String query = "SELECT ltrim('drill') as col FROM (VALUES(1)) limit 0";

        List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema = Lists.newArrayList();
        TypeProtos.MajorType majorType = TypeProtos.MajorType.newBuilder()
                .setMinorType(TypeProtos.MinorType.VARCHAR).setMode(TypeProtos.DataMode.REQUIRED).build();
        expectedSchema.add(Pair.of(SchemaPath.getSimplePath("col"), majorType));

        testBuilder().sqlQuery(query).schemaBaseLine(expectedSchema).baselineValues(new Object[0]).build().run();
    }

    @Test(expected = AssertionError.class)
    public void testSchemaTestBuilderSetInvalidBaselineRecords() throws Exception {
        final String query = "SELECT ltrim('drill') as col FROM (VALUES(1)) limit 0";

        List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema = Lists.newArrayList();
        TypeProtos.MajorType majorType = TypeProtos.MajorType.newBuilder()
                .setMinorType(TypeProtos.MinorType.VARCHAR).setMode(TypeProtos.DataMode.REQUIRED).build();
        expectedSchema.add(Pair.of(SchemaPath.getSimplePath("col"), majorType));

        testBuilder().sqlQuery(query).schemaBaseLine(expectedSchema)
                .baselineRecords(Collections.<Map<String, Object>>emptyList()).build().run();
    }

    @Test(expected = AssertionError.class)
    public void testSchemaTestBuilderSetInvalidBaselineColumns() throws Exception {
        final String query = "SELECT ltrim('drill') as col FROM (VALUES(1)) limit 0";

        List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema = Lists.newArrayList();
        TypeProtos.MajorType majorType = TypeProtos.MajorType.newBuilder()
                .setMinorType(TypeProtos.MinorType.VARCHAR).setMode(TypeProtos.DataMode.REQUIRED).build();
        expectedSchema.add(Pair.of(SchemaPath.getSimplePath("col"), majorType));

        testBuilder().sqlQuery(query).baselineColumns("col").schemaBaseLine(expectedSchema).build().run();
    }

    @Test
    public void testCSVVerification() throws Exception {
        testBuilder()
                .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`")
                .ordered().csvBaselineFile("testframework/small_test_data.tsv")
                .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR,
                        TypeProtos.MinorType.VARCHAR)
                .baselineColumns("employee_id", "first_name", "last_name").build().run();
    }

    @Test
    public void testBaselineValsVerification() throws Exception {
        testBuilder().sqlQuery(
                "select employee_id, first_name, last_name from cp.`testframework/small_test_data.json` limit 1")
                .ordered().baselineColumns("employee_id", "first_name", "last_name")
                .baselineValues(12l, "Jewel", "Creek").build().run();

        testBuilder().sqlQuery(
                "select employee_id, first_name, last_name from cp.`testframework/small_test_data.json` limit 1")
                .unOrdered().baselineColumns("employee_id", "first_name", "last_name")
                .baselineValues(12l, "Jewel", "Creek").build().run();
    }

    @Test
    public void testDecimalBaseline() throws Exception {
        try {
            test(String.format("alter session set `%s` = true", PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY));

            // type information can be provided explicitly
            testBuilder().sqlQuery(
                    "select cast(dec_col as decimal(38,2)) dec_col from cp.`testframework/decimal_test.json`")
                    .unOrdered().csvBaselineFile("testframework/decimal_test.tsv")
                    .baselineTypes(Types.withScaleAndPrecision(TypeProtos.MinorType.DECIMAL38SPARSE,
                            TypeProtos.DataMode.REQUIRED, 2, 38))
                    .baselineColumns("dec_col").build().run();

            // type information can also be left out, this will prompt the result types of the test query to drive the
            // interpretation of the test file
            testBuilder().sqlQuery(
                    "select cast(dec_col as decimal(38,2)) dec_col from cp.`testframework/decimal_test.json`")
                    .unOrdered().csvBaselineFile("testframework/decimal_test.tsv").baselineColumns("dec_col")
                    .build().run();

            // Or you can provide explicit values to the builder itself to avoid going through the drill engine at all to
            // populate the baseline results
            testBuilder().sqlQuery(
                    "select cast(dec_col as decimal(38,2)) dec_col from cp.`testframework/decimal_test.json`")
                    .unOrdered().baselineColumns("dec_col").baselineValues(new BigDecimal("3.70")).build().run();
        } finally {
            test(String.format("alter session set `%s` = false", PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY));
        }
    }

    @Test
    public void testMapOrdering() throws Exception {
        testBuilder().sqlQuery("select * from cp.`/testframework/map_reordering.json`").unOrdered()
                .jsonBaselineFile("testframework/map_reordering2.json").build().run();
    }

    @Test
    public void testBaselineValsVerificationWithNulls() throws Exception {
        testBuilder().sqlQuery("select * from cp.`store/json/json_simple_with_null.json`").ordered()
                .baselineColumns("a", "b").baselineValues(5l, 10l).baselineValues(7l, null)
                .baselineValues(null, null).baselineValues(9l, 11l).build().run();

        testBuilder().sqlQuery("select * from cp.`store/json/json_simple_with_null.json`").unOrdered()
                .baselineColumns("a", "b").baselineValues(5l, 10l).baselineValues(9l, 11l).baselineValues(7l, null)
                .baselineValues(null, null).build().run();
    }

    @Test
    public void testBaselineValsVerificationWithComplexAndNulls() throws Exception {
        testBuilder().sqlQuery("select * from cp.`/jsoninput/input2.json` limit 1").ordered()
                .baselineColumns("integer", "float", "x", "z", "l", "rl")
                .baselineValues(2010l, 17.4, mapOf("y", "kevin", "z", "paul"),
                        listOf(mapOf("orange", "yellow", "pink", "red"), mapOf("pink", "purple")), listOf(4l, 2l),
                        listOf(listOf(2l, 1l), listOf(4l, 6l)))
                .build().run();
    }

    @Test
    public void testCSVVerification_missing_records_fails() throws Exception {
        try {
            testBuilder().sqlQuery(
                    "select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`")
                    .ordered().csvBaselineFile("testframework/small_test_data_extra.tsv")
                    .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR,
                            TypeProtos.MinorType.VARCHAR)
                    .baselineColumns("employee_id", "first_name", "last_name").build().run();
        } catch (AssertionError ex) {
            assertEquals("Incorrect number of rows returned by query. expected:<7> but was:<5>", ex.getMessage());
            // this indicates successful completion of the test
            return;
        }
        throw new Exception("Test framework verification failed, expected failure on missing records.");
    }

    @Test
    public void testCSVVerification_extra_records_fails() throws Exception {
        try {
            testBuilder().sqlQuery("select " + CSV_COLS + " from cp.`testframework/small_test_data_extra.tsv`")
                    .ordered().csvBaselineFile("testframework/small_test_data.tsv")
                    .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR,
                            TypeProtos.MinorType.VARCHAR)
                    .baselineColumns("employee_id", "first_name", "last_name").build().run();
        } catch (AssertionError ex) {
            assertEquals("Incorrect number of rows returned by query. expected:<5> but was:<7>", ex.getMessage());
            // this indicates successful completion of the test
            return;
        }
        throw new Exception("Test framework verification failed, expected failure for extra records.");
    }

    @Test
    public void testCSVVerification_extra_column_fails() throws Exception {
        try {
            testBuilder()
                    .sqlQuery("select " + CSV_COLS
                            + ", columns[3] as address from cp.`testframework/small_test_data_extra_col.tsv`")
                    .ordered().csvBaselineFile("testframework/small_test_data.tsv")
                    .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR,
                            TypeProtos.MinorType.VARCHAR)
                    .baselineColumns("employee_id", "first_name", "last_name").build().run();
        } catch (AssertionError ex) {
            assertEquals("Unexpected extra column `address` returned by query.", ex.getMessage());
            // this indicates successful completion of the test
            return;
        }
        throw new Exception("Test framework verification failed, expected failure on extra column.");
    }

    @Test
    public void testCSVVerification_missing_column_fails() throws Exception {
        try {
            testBuilder().sqlQuery(
                    "select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`")
                    .ordered().csvBaselineFile("testframework/small_test_data_extra_col.tsv")
                    .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR,
                            TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR)
                    .baselineColumns("employee_id", "first_name", "last_name", "address").build().run();
        } catch (Exception ex) {
            assertTrue(ex.getMessage(),
                    ex.getMessage().startsWith("Expected column(s) `address`,  not found in result set"));
            // this indicates successful completion of the test
            return;
        }
        throw new Exception("Test framework verification failed, expected failure on missing column.");
    }

    @Test
    public void testCSVVerificationOfTypes() throws Throwable {
        try {
            testBuilder().sqlQuery(
                    "select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`")
                    .ordered().csvBaselineFile("testframework/small_test_data.tsv")
                    .baselineTypes(TypeProtos.MinorType.INT, TypeProtos.MinorType.VARCHAR,
                            TypeProtos.MinorType.VARCHAR)
                    .baselineColumns("employee_id", "first_name", "last_name").build().run();
        } catch (Exception ex) {
            assertThat(ex.getMessage(), CoreMatchers.containsString(
                    "at position 0 column '`employee_id`' mismatched values, expected: 12(Integer) but received 12(Long)"));
            // this indicates successful completion of the test
            return;
        }
        throw new Exception("Test framework verification failed, expected failure on type check.");
    }

    @Test
    public void testCSVVerificationOfOrder_checkFailure() throws Throwable {
        try {
            testBuilder().sqlQuery(
                    "select columns[0] as employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`")
                    .ordered().csvBaselineFile("testframework/small_test_data.tsv")
                    .baselineColumns("employee_id", "first_name", "last_name").build().run();
        } catch (Exception ex) {
            assertThat(ex.getMessage(), CoreMatchers.containsString(
                    "at position 0 column '`employee_id`' mismatched values, expected: 12(String) but received 16(String)"));
            // this indicates successful completion of the test
            return;
        }
        throw new Exception("Test framework verification failed, expected failure on order check.");
    }

    @Test
    public void testCSVVerificationOfUnorderedComparison() throws Throwable {
        testBuilder().sqlQuery(
                "select columns[0] as employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`")
                .unOrdered().csvBaselineFile("testframework/small_test_data.tsv")
                .baselineColumns("employee_id", "first_name", "last_name").build().run();
    }

    // TODO - enable more advanced type handling for JSON, currently basic support works
    // add support for type information taken from test query, or explicit type expectations
    @Test
    public void testBasicJSON() throws Exception {
        testBuilder().sqlQuery("select * from cp.`scan_json_test_3.json`").ordered()
                .jsonBaselineFile("/scan_json_test_3.json").build().run();

        testBuilder().sqlQuery("select * from cp.`scan_json_test_3.json`").unOrdered() // Check other verification method with same files
                .jsonBaselineFile("/scan_json_test_3.json").build().run();
    }

    @Test
    public void testComplexJSON_all_text() throws Exception {
        testBuilder().sqlQuery("select * from cp.`store/json/schema_change_int_to_string.json`")
                .optionSettingQueriesForTestQuery("alter system set `store.json.all_text_mode` = true").ordered()
                .jsonBaselineFile("store/json/schema_change_int_to_string.json")
                .optionSettingQueriesForBaseline("alter system set `store.json.all_text_mode` = true").build()
                .run();

        testBuilder().sqlQuery("select * from cp.`store/json/schema_change_int_to_string.json`")
                .optionSettingQueriesForTestQuery("alter system set `store.json.all_text_mode` = true").unOrdered() // Check other verification method with same files
                .jsonBaselineFile("store/json/schema_change_int_to_string.json")
                .optionSettingQueriesForBaseline("alter system set `store.json.all_text_mode` = true").build()
                .run();
        test("alter system set `store.json.all_text_mode` = false");
    }

    @Test
    public void testRepeatedColumnMatching() throws Exception {
        try {
            testBuilder().sqlQuery("select * from cp.`store/json/schema_change_int_to_string.json`")
                    .optionSettingQueriesForTestQuery("alter system set `store.json.all_text_mode` = true")
                    .ordered().jsonBaselineFile("testframework/schema_change_int_to_string_non-matching.json")
                    .optionSettingQueriesForBaseline("alter system set `store.json.all_text_mode` = true").build()
                    .run();
        } catch (Exception ex) {
            assertThat(ex.getMessage(),
                    CoreMatchers.containsString("at position 1 column '`field_1`' mismatched values, "
                            + "expected: [\"5\",\"2\",\"3\",\"4\",\"1\",\"2\"](JsonStringArrayList) but received [\"5\"](JsonStringArrayList)"));
            // this indicates successful completion of the test
            return;
        }
        throw new Exception("Test framework verification failed, expected failure on order check.");
    }

    @Test
    public void testEmptyResultSet() throws Exception {
        testBuilder().sqlQuery("select * from cp.`store/json/json_simple_with_null.json` where 1=0")
                .expectsEmptyResultSet().build().run();
        try {
            testBuilder().sqlQuery("select * from cp.`store/json/json_simple_with_null.json`")
                    .expectsEmptyResultSet().build().run();
        } catch (AssertionError ex) {
            assertEquals("Different number of records returned expected:<0> but was:<4>", ex.getMessage());
            // this indicates successful completion of the test
            return;
        }
        throw new Exception("Test framework verification failed, expected failure on unexpected records.");
    }

    @Test
    public void testCSVVerificationTypeMap() throws Throwable {
        Map<SchemaPath, TypeProtos.MajorType> typeMap = new HashMap<>();
        typeMap.put(TestBuilder.parsePath("first_name"), Types.optional(TypeProtos.MinorType.VARCHAR));
        typeMap.put(TestBuilder.parsePath("employee_id"), Types.optional(TypeProtos.MinorType.INT));
        typeMap.put(TestBuilder.parsePath("last_name"), Types.optional(TypeProtos.MinorType.VARCHAR));
        testBuilder().sqlQuery(
                "select cast(columns[0] as int) employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`")
                .unOrdered().csvBaselineFile("testframework/small_test_data.tsv")
                .baselineColumns("employee_id", "first_name", "last_name")
                // This should work without this line because of the default type casts added based on the types that come out of the test query.
                // To write a test that enforces strict typing you must pass type information using a CSV with a list of types,
                // or any format with a Map of types like is constructed above and include the call to pass it into the test, which is commented out below
                //.baselineTypes(typeMap)
                .build().run();

        typeMap.clear();
        typeMap.put(TestBuilder.parsePath("first_name"), Types.optional(TypeProtos.MinorType.VARCHAR));
        // This is the wrong type intentionally to ensure failures happen when expected
        typeMap.put(TestBuilder.parsePath("employee_id"), Types.optional(TypeProtos.MinorType.VARCHAR));
        typeMap.put(TestBuilder.parsePath("last_name"), Types.optional(TypeProtos.MinorType.VARCHAR));

        try {
            testBuilder().sqlQuery(
                    "select cast(columns[0] as int) employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`")
                    .unOrdered().csvBaselineFile("testframework/small_test_data.tsv")
                    .baselineColumns("employee_id", "first_name", "last_name").baselineTypes(typeMap).build().run();
        } catch (Exception ex) {
            // this indicates successful completion of the test
            return;
        }
        throw new Exception("Test framework verification failed, expected failure on type check.");
    }

}