org.apache.drill.exec.store.hive.HiveStoragePlugin.java Source code

Introduction

Here is the source code for org.apache.drill.exec.store.hive.HiveStoragePlugin.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.store.hive;

import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;

import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableSet;

import org.apache.calcite.schema.Schema.TableType;
import org.apache.calcite.schema.SchemaPlus;

import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.drill.common.JSONOptions;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.logical.FormatPluginConfig;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.ops.OptimizerRulesContext;
import org.apache.drill.exec.physical.base.AbstractGroupScan;
import org.apache.drill.exec.planner.sql.logical.ConvertHiveMapRDBJsonScanToDrillMapRDBJsonScan;
import org.apache.drill.exec.planner.sql.logical.ConvertHiveParquetScanToDrillParquetScan;
import org.apache.drill.exec.planner.sql.logical.HivePushPartitionFilterIntoScan;
import org.apache.drill.exec.server.DrillbitContext;
import org.apache.drill.exec.server.options.OptionManager;
import org.apache.drill.exec.server.options.SessionOptionManager;
import org.apache.drill.exec.store.AbstractStoragePlugin;
import org.apache.drill.exec.store.SchemaConfig;
import org.apache.drill.exec.store.StoragePluginOptimizerRule;
import org.apache.drill.exec.store.dfs.FormatPlugin;
import org.apache.drill.exec.store.hive.schema.HiveSchemaFactory;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.drill.exec.store.mapr.db.MapRDBFormatPlugin;
import org.apache.drill.exec.store.mapr.db.MapRDBFormatPluginConfig;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.thrift.transport.TTransportException;

public class HiveStoragePlugin extends AbstractStoragePlugin {

    private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveStoragePlugin.class);

    public static final String HIVE_MAPRDB_FORMAT_PLUGIN_NAME = "hive-maprdb";

    private final HiveStoragePluginConfig config;
    private HiveSchemaFactory schemaFactory;
    private final HiveConf hiveConf;

    public HiveStoragePlugin(HiveStoragePluginConfig config, DrillbitContext context, String name)
            throws ExecutionSetupException {
        super(context, name);
        this.config = config;
        this.hiveConf = HiveUtilities.generateHiveConf(config.getConfigProps());
        this.schemaFactory = new HiveSchemaFactory(this, name, hiveConf);
    }

    public HiveConf getHiveConf() {
        return hiveConf;
    }

    public HiveStoragePluginConfig getConfig() {
        return config;
    }

    @Override
    public HiveScan getPhysicalScan(String userName, JSONOptions selection, SessionOptionManager options)
            throws IOException {
        return getPhysicalScan(userName, selection, AbstractGroupScan.ALL_COLUMNS, options);
    }

    @Override
    public HiveScan getPhysicalScan(String userName, JSONOptions selection, List<SchemaPath> columns)
            throws IOException {
        return getPhysicalScan(userName, selection, columns, null);
    }

    @Override
    public HiveScan getPhysicalScan(String userName, JSONOptions selection, List<SchemaPath> columns,
            SessionOptionManager options) throws IOException {
        HiveReadEntry hiveReadEntry = selection.getListWith(new ObjectMapper(), new TypeReference<HiveReadEntry>() {
        });
        try {
            if (hiveReadEntry.getJdbcTableType() == TableType.VIEW) {
                throw new UnsupportedOperationException(
                        "Querying views created in Hive from Drill is not supported in current version.");
            }

            Map<String, String> confProperties = new HashMap<>();
            if (options != null) {
                String value = StringEscapeUtils
                        .unescapeJava(options.getString(ExecConstants.HIVE_CONF_PROPERTIES));
                logger.trace("[{}] is set to {}.", ExecConstants.HIVE_CONF_PROPERTIES, value);
                try {
                    Properties properties = new Properties();
                    properties.load(new StringReader(value));
                    confProperties = properties.stringPropertyNames().stream()
                            .collect(Collectors.toMap(Function.identity(), properties::getProperty, (o, n) -> n));
                } catch (IOException e) {
                    logger.warn("Unable to parse Hive conf properties {}, ignoring them.", value);
                }
            }

            return new HiveScan(userName, hiveReadEntry, this, columns, null, confProperties);
        } catch (ExecutionSetupException e) {
            throw new IOException(e);
        }
    }

    // Forced to synchronize this method to allow error recovery
    // in the multi-threaded case. Can remove synchronized only
    // by restructuring connections and cache to allow better
    // recovery from failed secure connections.

    @Override
    public synchronized void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) throws IOException {
        try {
            schemaFactory.registerSchemas(schemaConfig, parent);
            return;

            // Hack. We may need to retry the connection. But, we can't because
            // the retry logic is implemented in the very connection we need to
            // discard and rebuild. To work around, we discard the entire schema
            // factory, and all its invalid connections. Very crude, but the
            // easiest short-term solution until we refactor the code to do the
            // job properly. See DRILL-5510.

        } catch (Throwable e) {
            // Unwrap exception
            Throwable ex = e;
            for (;;) {
                // Case for failing on an invalid cached connection
                if (ex instanceof MetaException ||
                // Case for a timed-out impersonated connection, and
                // an invalid non-secure connection used to get security
                // tokens.
                        ex instanceof TTransportException) {
                    break;
                }

                // All other exceptions are not handled, just pass along up
                // the stack.

                if (ex.getCause() == null || ex.getCause() == ex) {
                    logger.error("Hive metastore register schemas failed", e);
                    throw new DrillRuntimeException("Unknown Hive error", e);
                }
                ex = ex.getCause();
            }
        }

        // Build a new factory which will cause an all new set of
        // Hive metastore connections to be created.

        try {
            schemaFactory.close();
        } catch (Throwable t) {
            // Ignore, we're in a bad state.
            logger.warn("Schema factory forced close failed, error ignored", t);
        }
        try {
            schemaFactory = new HiveSchemaFactory(this, getName(), hiveConf);
        } catch (ExecutionSetupException e) {
            throw new DrillRuntimeException(e);
        }

        // Try the schemas again. If this fails, just give up.

        schemaFactory.registerSchemas(schemaConfig, parent);
        logger.debug("Successfully recovered from a Hive metastore connection failure.");
    }

    @Override
    public Set<StoragePluginOptimizerRule> getLogicalOptimizerRules(OptimizerRulesContext optimizerContext) {
        final String defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);

        ImmutableSet.Builder<StoragePluginOptimizerRule> ruleBuilder = ImmutableSet.builder();

        ruleBuilder
                .add(HivePushPartitionFilterIntoScan.getFilterOnProject(optimizerContext, defaultPartitionValue));
        ruleBuilder.add(HivePushPartitionFilterIntoScan.getFilterOnScan(optimizerContext, defaultPartitionValue));

        return ruleBuilder.build();
    }

    @Override
    public Set<StoragePluginOptimizerRule> getPhysicalOptimizerRules(OptimizerRulesContext optimizerRulesContext) {
        ImmutableSet.Builder<StoragePluginOptimizerRule> ruleBuilder = ImmutableSet.builder();
        OptionManager options = optimizerRulesContext.getPlannerSettings().getOptions();
        // TODO: Remove implicit using of convert_fromTIMESTAMP_IMPALA function
        // once "store.parquet.reader.int96_as_timestamp" will be true by default
        if (options.getBoolean(ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS)
                || options.getBoolean(ExecConstants.HIVE_OPTIMIZE_PARQUET_SCAN_WITH_NATIVE_READER)) {
            ruleBuilder.add(ConvertHiveParquetScanToDrillParquetScan.INSTANCE);
        }
        if (options.getBoolean(ExecConstants.HIVE_OPTIMIZE_MAPRDB_JSON_SCAN_WITH_NATIVE_READER)) {
            ruleBuilder.add(ConvertHiveMapRDBJsonScanToDrillMapRDBJsonScan.INSTANCE);
        }
        return ruleBuilder.build();
    }

    @Override
    public FormatPlugin getFormatPlugin(FormatPluginConfig formatConfig) {
        //  TODO: implement formatCreator similar to FileSystemPlugin formatCreator. DRILL-6621
        if (formatConfig instanceof MapRDBFormatPluginConfig) {
            try {
                return new MapRDBFormatPlugin(HIVE_MAPRDB_FORMAT_PLUGIN_NAME, context, hiveConf, config,
                        (MapRDBFormatPluginConfig) formatConfig);
            } catch (IOException e) {
                throw new DrillRuntimeException("The error is occurred while connecting to MapR-DB", e);
            }
        }
        throw new DrillRuntimeException(
                String.format("Hive storage plugin doesn't support usage of %s format plugin",
                        formatConfig.getClass().getName()));
    }

}