org.apache.drill.exec.hive.TestHiveStorage.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.drill.exec.hive.TestHiveStorage.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.hive;

import com.google.common.collect.ImmutableMap;
import org.apache.drill.common.exceptions.UserRemoteException;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.hadoop.fs.FileSystem;
import org.joda.time.DateTime;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

import java.math.BigDecimal;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.Map;

import static org.hamcrest.CoreMatchers.containsString;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;

public class TestHiveStorage extends HiveTestBase {
    @BeforeClass
    public static void setupOptions() throws Exception {
        test(String.format("alter session set `%s` = true", PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY));
    }

    @Test // DRILL-4083
    public void testNativeScanWhenNoColumnIsRead() throws Exception {
        try {
            test(String.format("alter session set `%s` = true",
                    ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS));

            String query = "SELECT count(*) as col FROM hive.countStar_Parquet";
            testPhysicalPlan(query, "hive-drill-native-parquet-scan");

            testBuilder().sqlQuery(query).unOrdered().baselineColumns("col").baselineValues(200l).go();
        } finally {
            test(String.format("alter session set `%s` = %s", ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS,
                    ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS_VALIDATOR.getDefault().bool_val ? "true"
                            : "false"));
        }
    }

    @Test
    public void hiveReadWithDb() throws Exception {
        test("select * from hive.kv");
    }

    @Test
    public void queryEmptyHiveTable() throws Exception {
        testBuilder().sqlQuery("SELECT * FROM hive.empty_table").expectsEmptyResultSet().go();
    }

    @Test // DRILL-3328
    public void convertFromOnHiveBinaryType() throws Exception {
        testBuilder().sqlQuery("SELECT convert_from(binary_field, 'UTF8') col1 from hive.readtest").unOrdered()
                .baselineColumns("col1").baselineValues("binaryfield").baselineValues(new Object[] { null }).go();
    }

    /**
     * Test to ensure Drill reads the all supported types correctly both normal fields (converted to Nullable types) and
     * partition fields (converted to Required types).
     * @throws Exception
     */
    @Test
    public void readAllSupportedHiveDataTypes() throws Exception {
        testBuilder().sqlQuery("SELECT * FROM hive.readtest").unOrdered()
                .baselineColumns("binary_field", "boolean_field", "tinyint_field", "decimal0_field",
                        "decimal9_field", "decimal18_field", "decimal28_field", "decimal38_field", "double_field",
                        "float_field", "int_field", "bigint_field", "smallint_field", "string_field",
                        "varchar_field", "timestamp_field", "date_field", "char_field",
                        // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now.
                        //"binary_part",
                        "boolean_part", "tinyint_part", "decimal0_part", "decimal9_part", "decimal18_part",
                        "decimal28_part", "decimal38_part", "double_part", "float_part", "int_part", "bigint_part",
                        "smallint_part", "string_part", "varchar_part", "timestamp_part", "date_part", "char_part")
                .baselineValues("binaryfield".getBytes(), false, 34, new BigDecimal("66"),
                        new BigDecimal("2347.92"), new BigDecimal("2758725827.99990"),
                        new BigDecimal("29375892739852.8"), new BigDecimal("89853749534593985.783"), 8.345d, 4.67f,
                        123456, 234235L, 3455, "stringfield", "varcharfield",
                        new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()),
                        new DateTime(Date.valueOf("2013-07-05").getTime()), "charfield",
                        // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now.
                        //"binary",
                        true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new BigDecimal("3289379872.94565"),
                        new BigDecimal("39579334534534.4"), new BigDecimal("363945093845093890.900"), 8.345d, 4.67f,
                        123456, 234235L, 3455, "string", "varchar",
                        new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()),
                        new DateTime(Date.valueOf("2013-07-05").getTime()), "char")
                .baselineValues( // All fields are null, but partition fields have non-null values
                        null, null, null, null, null, null, null, null, null, null, null, null, null, null, null,
                        null, null, null,
                        // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now.
                        //"binary",
                        true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new BigDecimal("3289379872.94565"),
                        new BigDecimal("39579334534534.4"), new BigDecimal("363945093845093890.900"), 8.345d, 4.67f,
                        123456, 234235L, 3455, "string", "varchar",
                        new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()),
                        new DateTime(Date.valueOf("2013-07-05").getTime()), "char")
                .go();
    }

    /**
     * Test to ensure Drill reads the all supported types through native Parquet readers.
     * NOTE: As part of Hive 1.2 upgrade, make sure this test and {@link #readAllSupportedHiveDataTypes()} are merged
     * into one test.
     */
    @Test
    public void readAllSupportedHiveDataTypesNativeParquet() throws Exception {
        try {
            test(String.format("alter session set `%s` = true",
                    ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS));
            final String query = "SELECT * FROM hive.readtest_parquet";

            // Make sure the plan has Hive scan with native parquet reader
            testPhysicalPlan(query, "hive-drill-native-parquet-scan");

            testBuilder().sqlQuery(query).unOrdered().baselineColumns("binary_field", "boolean_field",
                    "tinyint_field", "decimal0_field", "decimal9_field", "decimal18_field", "decimal28_field",
                    "decimal38_field", "double_field", "float_field", "int_field", "bigint_field", "smallint_field",
                    "string_field", "varchar_field", "timestamp_field", "char_field",
                    // There is a regression in Hive 1.2.1 in binary and boolean partition columns. Disable for now.
                    //"binary_part",
                    "boolean_part", "tinyint_part", "decimal0_part", "decimal9_part", "decimal18_part",
                    "decimal28_part", "decimal38_part", "double_part", "float_part", "int_part", "bigint_part",
                    "smallint_part", "string_part", "varchar_part", "timestamp_part", "date_part", "char_part")
                    .baselineValues("binaryfield".getBytes(), false, 34, new BigDecimal("66"),
                            new BigDecimal("2347.92"), new BigDecimal("2758725827.99990"),
                            new BigDecimal("29375892739852.8"), new BigDecimal("89853749534593985.783"), 8.345d,
                            4.67f, 123456, 234235L, 3455, "stringfield", "varcharfield",
                            new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()), "charfield",
                            // There is a regression in Hive 1.2.1 in binary and boolean partition columns. Disable for now.
                            //"binary",
                            true, 64, new BigDecimal("37"), new BigDecimal("36.90"),
                            new BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"),
                            new BigDecimal("363945093845093890.900"), 8.345d, 4.67f, 123456, 234235L, 3455,
                            "string", "varchar", new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()),
                            new DateTime(Date.valueOf("2013-07-05").getTime()), "char")
                    .baselineValues( // All fields are null, but partition fields have non-null values
                            null, null, null, null, null, null, null, null, null, null, null, null, null, null,
                            null, null, null,
                            // There is a regression in Hive 1.2.1 in binary and boolean partition columns. Disable for now.
                            //"binary",
                            true, 64, new BigDecimal("37"), new BigDecimal("36.90"),
                            new BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"),
                            new BigDecimal("363945093845093890.900"), 8.345d, 4.67f, 123456, 234235L, 3455,
                            "string", "varchar", new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()),
                            new DateTime(Date.valueOf("2013-07-05").getTime()), "char")
                    .go();
        } finally {
            test(String.format("alter session set `%s` = false",
                    ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS));
        }
    }

    @Test
    public void orderByOnHiveTable() throws Exception {
        testBuilder().sqlQuery("SELECT * FROM hive.kv ORDER BY `value` DESC").ordered()
                .baselineColumns("key", "value").baselineValues(5, " key_5").baselineValues(4, " key_4")
                .baselineValues(3, " key_3").baselineValues(2, " key_2").baselineValues(1, " key_1").go();
    }

    @Test
    public void queryingTablesInNonDefaultFS() throws Exception {
        // Update the default FS settings in Hive test storage plugin to non-local FS
        hiveTest.updatePluginConfig(getDrillbitContext().getStorage(),
                ImmutableMap.of(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost:9001"));

        testBuilder().sqlQuery("SELECT * FROM hive.`default`.kv LIMIT 1").unOrdered()
                .baselineColumns("key", "value").baselineValues(1, " key_1").go();
    }

    @Test // DRILL-745
    public void queryingHiveAvroTable() throws Exception {
        testBuilder().sqlQuery("SELECT * FROM hive.db1.avro ORDER BY key DESC LIMIT 1").unOrdered()
                .baselineColumns("key", "value").baselineValues(5, " key_5").go();
    }

    @Test // DRILL-3266
    public void queryingTableWithSerDeInHiveContribJar() throws Exception {
        testBuilder().sqlQuery("SELECT * FROM hive.db1.kv_db1 ORDER BY key DESC LIMIT 1").unOrdered()
                .baselineColumns("key", "value").baselineValues("5", " key_5").go();
    }

    @Test // DRILL-3746
    public void readFromPartitionWithCustomLocation() throws Exception {
        testBuilder()
                .sqlQuery("SELECT count(*) as cnt FROM hive.partition_pruning_test WHERE c=99 AND d=98 AND e=97")
                .unOrdered().baselineColumns("cnt").baselineValues(1L).go();
    }

    @Test // DRILL-3938
    public void readFromAlteredPartitionedTable() throws Exception {
        testBuilder().sqlQuery("SELECT key, `value`, newcol FROM hive.kv_parquet ORDER BY key LIMIT 1").unOrdered()
                .baselineColumns("key", "value", "newcol").baselineValues(1, " key_1", null).go();
    }

    @Test // DRILL-3938
    public void nativeReaderIsDisabledForAlteredPartitionedTable() throws Exception {
        try {
            test(String.format("alter session set `%s` = true",
                    ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS));
            final String query = "EXPLAIN PLAN FOR SELECT key, `value`, newcol FROM hive.kv_parquet ORDER BY key LIMIT 1";

            // Make sure the HiveScan in plan has no native parquet reader
            final String planStr = getPlanInString(query, JSON_FORMAT);
            assertFalse("Hive native is not expected in the plan",
                    planStr.contains("hive-drill-native-parquet-scan"));
        } finally {
            test(String.format("alter session set `%s` = false",
                    ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS));
        }
    }

    @Test // DRILL-3739
    public void readingFromStorageHandleBasedTable() throws Exception {
        testBuilder().sqlQuery("SELECT * FROM hive.kv_sh ORDER BY key LIMIT 2").ordered()
                .baselineColumns("key", "value").expectsEmptyResultSet().go();
    }

    @Test // DRILL-3739
    public void readingFromStorageHandleBasedTable2() throws Exception {
        try {
            test(String.format("alter session set `%s` = true",
                    ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS));

            testBuilder().sqlQuery("SELECT * FROM hive.kv_sh ORDER BY key LIMIT 2").ordered()
                    .baselineColumns("key", "value").expectsEmptyResultSet().go();
        } finally {
            test(String.format("alter session set `%s` = false",
                    ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS));
        }
    }

    @Test // DRILL-3688
    public void readingFromSmallTableWithSkipHeaderAndFooter() throws Exception {
        testBuilder().sqlQuery("select key, `value` from hive.skipper.kv_text_small order by key asc").ordered()
                .baselineColumns("key", "value").baselineValues(1, "key_1").baselineValues(2, "key_2")
                .baselineValues(3, "key_3").baselineValues(4, "key_4").baselineValues(5, "key_5").go();

        testBuilder().sqlQuery("select count(1) as cnt from hive.skipper.kv_text_small").unOrdered()
                .baselineColumns("cnt").baselineValues(5L).go();
    }

    @Test // DRILL-3688
    public void readingFromLargeTableWithSkipHeaderAndFooter() throws Exception {
        testBuilder().sqlQuery("select sum(key) as sum_keys from hive.skipper.kv_text_large").unOrdered()
                .baselineColumns("sum_keys").baselineValues((long) (5000 * (5000 + 1) / 2)).go();

        testBuilder().sqlQuery("select count(1) as cnt from hive.skipper.kv_text_large").unOrdered()
                .baselineColumns("cnt").baselineValues(5000L).go();
    }

    @Test // DRILL-3688
    public void testIncorrectHeaderFooterProperty() throws Exception {
        Map<String, String> testData = ImmutableMap.<String, String>builder()
                .put("hive.skipper.kv_incorrect_skip_header", "skip.header.line.count")
                .put("hive.skipper.kv_incorrect_skip_footer", "skip.footer.line.count").build();

        String query = "select * from %s";
        String exceptionMessage = "Hive table property %s value 'A' is non-numeric";

        for (Map.Entry<String, String> entry : testData.entrySet()) {
            try {
                test(String.format(query, entry.getKey()));
            } catch (UserRemoteException e) {
                assertThat(e.getMessage(), containsString(String.format(exceptionMessage, entry.getValue())));
            }
        }
    }

    @Test // DRILL-3688
    public void testIgnoreSkipHeaderFooterForRcfile() throws Exception {
        testBuilder().sqlQuery("select count(1) as cnt from hive.skipper.kv_rcfile_large").unOrdered()
                .baselineColumns("cnt").baselineValues(5000L).go();
    }

    @Test // DRILL-3688
    public void testIgnoreSkipHeaderFooterForParquet() throws Exception {
        testBuilder().sqlQuery("select count(1) as cnt from hive.skipper.kv_parquet_large").unOrdered()
                .baselineColumns("cnt").baselineValues(5000L).go();
    }

    @Test // DRILL-3688
    public void testIgnoreSkipHeaderFooterForSequencefile() throws Exception {
        testBuilder().sqlQuery("select count(1) as cnt from hive.skipper.kv_sequencefile_large").unOrdered()
                .baselineColumns("cnt").baselineValues(5000L).go();
    }

    @AfterClass
    public static void shutdownOptions() throws Exception {
        test(String.format("alter session set `%s` = false", PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY));
    }
}