org.apache.drill.exec.store.parquet.TestPushDownAndPruningForVarchar.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.drill.exec.store.parquet.TestPushDownAndPruningForVarchar.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.store.parquet;

import org.apache.commons.io.FileUtils;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.test.ClusterFixture;
import org.apache.drill.test.ClusterFixtureBuilder;
import org.apache.drill.test.ClusterTest;
import org.apache.drill.test.QueryBuilder;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;

import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.time.Instant;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

public class TestPushDownAndPruningForVarchar extends ClusterTest {

    private static File fileStore;

    @BeforeClass
    public static void setup() throws Exception {
        ClusterFixtureBuilder builder = ClusterFixture.builder(dirTestWatcher);
        /*
          Contains two data files generated by Drill 1.13.0 version
          (before upgrade to Parquet lib 1.10.0).
          Each file has two varchar columns.
            
          0_0_1.parquet       0_0_2.parquet
          -----------         -----------
          part | val          part | val
          -----------         -----------
          A    | A1           B    | B1
          A    | A2           B    | B2
            
          Also contains .drill.parquet_metadata generated for these two files.
         */
        fileStore = dirTestWatcher.copyResourceToRoot(Paths.get("parquet", "varchar_gen_1_13_0"));
        startCluster(builder);
    }

    @Test
    public void testOldFilesPruningWithAndWithoutMeta() throws Exception {
        String tableNoMeta = createTable("varchar_pruning_old_without_meta", true);
        String tableWithMeta = createTable("varchar_pruning_old_with_meta", false);

        Map<String, String> properties = new HashMap<>();
        properties.put(tableNoMeta, "false");
        properties.put(tableWithMeta, "true");

        try {
            for (Map.Entry<String, String> property : properties.entrySet()) {
                for (String optionValue : Arrays.asList("true", "false", "")) {
                    client.alterSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX, optionValue);
                    String query = String.format("select * from %s where part = 'A'", property.getKey());
                    String plan = client.queryBuilder().sql(query).explainText();
                    assertTrue(plan.contains("numRowGroups=1"));
                    assertTrue(plan.contains(String.format("usedMetadataFile=%s", property.getValue())));
                    assertFalse(plan.contains("Filter"));

                    client.testBuilder().sqlQuery(query).unOrdered().baselineColumns("part", "val")
                            .baselineValues("A", "A1").baselineValues("A", "A2").go();
                }
            }
        } finally {
            client.resetSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX);

            properties.keySet().forEach(k -> client.runSqlSilently(String.format("drop table if exists %s", k)));
        }
    }

    @Test
    public void testOldFilesPruningWithNewMeta() throws Exception {
        String table = createTable("varchar_pruning_old_with_new_meta", true);

        try {
            for (String optionValue : Arrays.asList("true", "false", "")) {
                client.alterSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX, optionValue);
                queryBuilder().sql(String.format("refresh table metadata %s", table)).run();
                String query = String.format("select * from %s where part = 'A'", table);
                String plan = client.queryBuilder().sql(query).explainText();
                assertTrue(plan.contains("numRowGroups=1"));
                assertTrue(plan.contains("usedMetadataFile=true"));
                assertFalse(plan.contains("Filter"));

                client.testBuilder().sqlQuery(query).unOrdered().baselineColumns("part", "val")
                        .baselineValues("A", "A1").baselineValues("A", "A2").go();
            }
        } finally {
            client.resetSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX);
            client.runSqlSilently(String.format("drop table if exists %s", table));
        }
    }

    @Test
    public void testNewFilesPruningNoMeta() throws Exception {
        String oldTable = createTable("varchar_pruning_old_without_meta", true);
        String newTable = "dfs.`tmp`.`varchar_pruning_new_without_meta`";

        try {
            queryBuilder().sql(
                    String.format("create table %s partition by (part) as select * from %s", newTable, oldTable))
                    .run();

            for (String optionValue : Arrays.asList("true", "false", "")) {
                client.alterSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX, optionValue);
                String query = String.format("select * from %s where part = 'A'", newTable);
                String plan = client.queryBuilder().sql(query).explainText();
                assertTrue(plan.contains("numRowGroups=1"));
                assertTrue(plan.contains("usedMetadataFile=false"));
                assertFalse(plan.contains("Filter"));

                client.testBuilder().sqlQuery(query).unOrdered().baselineColumns("part", "val")
                        .baselineValues("A", "A1").baselineValues("A", "A2").go();
            }
        } finally {
            client.resetSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX);
            client.runSqlSilently(String.format("drop table if exists %s", oldTable));
            client.runSqlSilently(String.format("drop table if exists %s", newTable));
        }
    }

    @Test
    public void testNewFilesPruningWithNewMeta() throws Exception {
        String oldTable = createTable("varchar_pruning_old_without_meta", true);
        String newTable = "dfs.`tmp`.`varchar_pruning_new_with_new_meta`";

        try {
            queryBuilder().sql(
                    String.format("create table %s partition by (part) as select * from %s", newTable, oldTable))
                    .run();
            queryBuilder().sql(String.format("refresh table metadata %s", newTable)).run();

            for (String optionValue : Arrays.asList("true", "false", "")) {
                client.alterSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX, optionValue);
                String query = String.format("select * from %s where part = 'A'", newTable);
                String plan = client.queryBuilder().sql(query).explainText();
                assertTrue(plan.contains("numRowGroups=1"));
                assertTrue(plan.contains("usedMetadataFile=true"));
                assertFalse(plan.contains("Filter"));

                client.testBuilder().sqlQuery(query).unOrdered().baselineColumns("part", "val")
                        .baselineValues("A", "A1").baselineValues("A", "A2").go();
            }
        } finally {
            client.resetSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX);
            client.runSqlSilently(String.format("drop table if exists %s", oldTable));
            client.runSqlSilently(String.format("drop table if exists %s", newTable));
        }
    }

    @Ignore("Statistics for VARCHAR that has all nulls is not available (PARQUET-1341). Requires upgrade to Parquet 1.11.0.")
    @Test
    public void testNewFilesPruningWithNullPartition() throws Exception {
        String table = "dfs.`tmp`.`varchar_pruning_new_with_null_partition`";

        try {
            queryBuilder().sql(String.format("create table %s partition by (col_vrchr) as "
                    + "select * from cp.`parquet/alltypes_optional.parquet`", table)).run();

            String query = String.format("select * from %s where col_vrchr = 'Nancy Cloke'", table);

            String plan = client.queryBuilder().sql(query).explainText();
            assertTrue(plan.contains("usedMetadataFile=false"));
            assertFalse(plan.contains("Filter"));

            QueryBuilder.QuerySummary result = client.queryBuilder().sql(query).run();
            assertTrue(result.succeeded());
            assertEquals(1, result.recordCount());

            queryBuilder().sql(String.format("refresh table metadata %s", table)).run();

            plan = client.queryBuilder().sql(query).explainText();
            assertTrue(plan.contains("usedMetadataFile=true"));
            assertFalse(plan.contains("Filter"));

            result = client.queryBuilder().sql(query).run();
            assertTrue(result.succeeded());
            assertEquals(1, result.recordCount());
        } finally {
            client.runSqlSilently(String.format("drop table if exists %s", table));
        }
    }

    @Test
    public void testOldFilesPushDownNoMeta() throws Exception {
        String table = createTable("varchar_push_down_old_without_meta", true);

        Map<String, String> properties = new HashMap<>();
        properties.put("true", "numRowGroups=1");
        properties.put("false", "numRowGroups=2");

        try {
            for (Map.Entry<String, String> property : properties.entrySet()) {
                client.alterSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX, property.getKey());
                String query = String.format("select * from %s where val = 'A1'", table);

                String plan = client.queryBuilder().sql(query).explainText();
                assertTrue(plan.contains(property.getValue()));
                assertTrue(plan.contains("usedMetadataFile=false"));

                client.testBuilder().sqlQuery(query).unOrdered().baselineColumns("part", "val")
                        .baselineValues("A", "A1").go();
            }
        } finally {
            client.resetSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX);
            client.runSqlSilently(String.format("drop table if exists %s", table));
        }
    }

    @Test
    public void testOldFilesPushDownWithOldMeta() throws Exception {
        String table = createTable("varchar_push_down_old_with_old_meta", false);

        Map<String, String> properties = new HashMap<>();
        properties.put("false", "numRowGroups=2");
        properties.put("true", "numRowGroups=1");

        try {
            for (Map.Entry<String, String> property : properties.entrySet()) {
                client.alterSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX, property.getKey());
                String query = String.format("select * from %s where val = 'A1'", table);

                String plan = client.queryBuilder().sql(query).explainText();
                assertTrue(plan.contains(property.getValue()));
                assertTrue(plan.contains("usedMetadataFile=true"));

                client.testBuilder().sqlQuery(query).unOrdered().baselineColumns("part", "val")
                        .baselineValues("A", "A1").go();
            }
        } finally {
            client.resetSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX);
            client.runSqlSilently(String.format("drop table if exists %s", table));
        }
    }

    @Test
    public void testNewFilesPushDownNoMeta() throws Exception {
        String oldTable = createTable("varchar_push_down_old_without_meta", true);
        String newTable = "dfs.`tmp`.`varchar_push_down_new_without_meta`";

        try {
            queryBuilder().sql(
                    String.format("create table %s partition by (part) as select * from %s", newTable, oldTable))
                    .run();

            for (String optionValue : Arrays.asList("true", "false", "")) {
                client.alterSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX, optionValue);
                String query = String.format("select * from %s where val = 'A1'", newTable);
                String plan = client.queryBuilder().sql(query).explainText();
                assertTrue(plan.contains("numRowGroups=1"));
                assertTrue(plan.contains("usedMetadataFile=false"));

                client.testBuilder().sqlQuery(query).unOrdered().baselineColumns("part", "val")
                        .baselineValues("A", "A1").go();
            }
        } finally {
            client.resetSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX);
            client.runSqlSilently(String.format("drop table if exists %s", oldTable));
            client.runSqlSilently(String.format("drop table if exists %s", newTable));
        }
    }

    @Test
    public void testNewFilesPushDownWithMeta() throws Exception {
        String oldTable = createTable("varchar_push_down_old_without_meta", true);
        String newTable = "dfs.`tmp`.`varchar_push_down_new_with_meta`";

        try {
            queryBuilder().sql(
                    String.format("create table %s partition by (part) as select * from %s", newTable, oldTable))
                    .run();
            queryBuilder().sql(String.format("refresh table metadata %s", newTable)).run();
            String query = String.format("select * from %s where val = 'A1'", newTable);
            // metadata for binary is allowed only after Drill 1.15.0
            // set string signed option to true, to read it on current Drill 1.15.0-SNAPSHOT version
            client.alterSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX, "true");
            String plan = client.queryBuilder().sql(query).explainText();
            assertTrue(plan.contains("numRowGroups=1"));
            assertTrue(plan.contains("usedMetadataFile=true"));

            client.testBuilder().sqlQuery(query).unOrdered().baselineColumns("part", "val")
                    .baselineValues("A", "A1").go();
        } finally {
            client.resetSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX);
            client.runSqlSilently(String.format("drop table if exists %s", oldTable));
            client.runSqlSilently(String.format("drop table if exists %s", newTable));
        }
    }

    private String createTable(String tableName, boolean removeMetadata) throws IOException {
        File rootDir = dirTestWatcher.getRootDir();
        File table = new File(rootDir, String.format("%s_%s", tableName, UUID.randomUUID()));
        FileUtils.copyDirectory(fileStore, table);
        File metadata = new File(table, ".drill.parquet_metadata");
        if (removeMetadata) {
            assertTrue(metadata.delete());
        } else {
            // metadata modification time should be higher
            // than directory modification time otherwise metadata file will be regenerated
            assertTrue(metadata.setLastModified(Instant.now().toEpochMilli()));
        }
        return String.format("dfs.`root`.`%s`", table.getName());
    }

}