org.apache.drill.exec.expr.fn.HiveFunctionRegistry.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.drill.exec.expr.fn.HiveFunctionRegistry.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.expr.fn;

import java.util.HashSet;
import java.util.Set;

import org.apache.drill.common.config.DrillConfig;
import org.apache.drill.common.expression.FunctionCall;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.common.types.Types;
import org.apache.drill.common.util.PathScanner;
import org.apache.drill.exec.expr.fn.impl.hive.ObjectInspectorHelper;
import org.apache.drill.exec.planner.sql.DrillOperatorTable;
import org.apache.drill.exec.planner.sql.HiveUDFOperator;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Sets;

public class HiveFunctionRegistry implements PluggableFunctionRegistry {
    static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveFunctionRegistry.class);

    private ArrayListMultimap<String, Class<? extends GenericUDF>> methodsGenericUDF = ArrayListMultimap.create();
    private ArrayListMultimap<String, Class<? extends UDF>> methodsUDF = ArrayListMultimap.create();
    private HashSet<Class<?>> nonDeterministicUDFs = new HashSet<>();

    /**
     * Scan the classpath for implementation of GenericUDF/UDF interfaces,
     * extracts function annotation and store the
     * (function name) --> (implementation class) mappings.
     * @param config
     */
    public HiveFunctionRegistry(DrillConfig config) {
        Set<Class<? extends GenericUDF>> genericUDFClasses = PathScanner.scanForImplementations(GenericUDF.class,
                null);
        for (Class<? extends GenericUDF> clazz : genericUDFClasses) {
            register(clazz, methodsGenericUDF);
        }

        Set<Class<? extends UDF>> udfClasses = PathScanner.scanForImplementations(UDF.class, null);
        for (Class<? extends UDF> clazz : udfClasses) {
            register(clazz, methodsUDF);
        }
    }

    @Override
    public void register(DrillOperatorTable operatorTable) {
        for (String name : Sets.union(methodsGenericUDF.asMap().keySet(), methodsUDF.asMap().keySet())) {
            operatorTable.add(name, new HiveUDFOperator(name.toUpperCase()));
        }
    }

    private <C, I> void register(Class<? extends I> clazz, ArrayListMultimap<String, Class<? extends I>> methods) {
        Description desc = clazz.getAnnotation(Description.class);
        String[] names;
        if (desc != null) {
            names = desc.name().split(",");
            for (int i = 0; i < names.length; i++) {
                names[i] = names[i].trim();
            }
        } else {
            names = new String[] { clazz.getName().replace('.', '_') };
        }

        UDFType type = clazz.getAnnotation(UDFType.class);
        if (type != null && type.deterministic()) {
            nonDeterministicUDFs.add(clazz);
        }

        for (int i = 0; i < names.length; i++) {
            methods.put(names[i].toLowerCase(), clazz);
        }
    }

    /**
     * Find the UDF class for given function name and check if it accepts the given input argument
     * types. If a match is found, create a holder and return
     * @param call
     * @return
     */
    @Override
    public HiveFuncHolder getFunction(FunctionCall call) {
        HiveFuncHolder h;

        h = resolveFunction(call, false);
        if (h != null) {
            return h;
        }

        return resolveFunction(call, true);
    }

    /**
     * Helper method which resolves the given function call to a Hive UDF. It takes an argument
     * <i>convertVarCharToVar16Char</i> which tells to implicitly cast input arguments of type VARCHAR to VAR16CHAR
     * and search Hive UDF registry using implicitly casted argument types.
     *
     * TODO: This is a rudimentary function resolver. Need to include more implicit casting such as DECIMAL28 to
     * DECIMAL38 as Hive UDFs can accept only DECIMAL38 type.
     */
    private HiveFuncHolder resolveFunction(FunctionCall call, boolean convertVarCharToVar16Char) {
        HiveFuncHolder holder;
        MajorType[] argTypes = new MajorType[call.args.size()];
        ObjectInspector[] argOIs = new ObjectInspector[call.args.size()];
        for (int i = 0; i < call.args.size(); i++) {
            try {
                argTypes[i] = call.args.get(i).getMajorType();
                if (convertVarCharToVar16Char && argTypes[i].getMinorType() == MinorType.VARCHAR) {
                    argTypes[i] = Types.withMode(MinorType.VAR16CHAR, argTypes[i].getMode());
                }
                argOIs[i] = ObjectInspectorHelper.getDrillObjectInspector(argTypes[i].getMode(),
                        argTypes[i].getMinorType());
            } catch (Exception e) {
                // Hive throws errors if there are unsupported types. Consider there is no hive UDF supporting the
                // given argument types
                logger.trace("Failed to find a hive function for given FunctionCall: '{}'", call.toString(), e);
                return null;
            }
        }

        String funcName = call.getName().toLowerCase();

        // search in GenericUDF list
        for (Class<? extends GenericUDF> clazz : methodsGenericUDF.get(funcName)) {
            holder = matchAndCreateGenericUDFHolder(clazz, argTypes, argOIs);
            if (holder != null) {
                return holder;
            }
        }

        // search in UDF list
        for (Class<? extends UDF> clazz : methodsUDF.get(funcName)) {
            holder = matchAndCreateUDFHolder(call.getName(), clazz, argTypes, argOIs);
            if (holder != null) {
                return holder;
            }
        }

        return null;
    }

    private HiveFuncHolder matchAndCreateGenericUDFHolder(Class<? extends GenericUDF> udfClazz,
            MajorType[] argTypes, ObjectInspector[] argOIs) {
        // probe UDF to find if the arg types and acceptable
        // if acceptable create a holder object
        try {
            GenericUDF udfInstance = udfClazz.newInstance();
            ObjectInspector returnOI = udfInstance.initialize(argOIs);
            return new HiveFuncHolder(udfClazz, argTypes, returnOI,
                    Types.optional(ObjectInspectorHelper.getDrillType(returnOI)),
                    nonDeterministicUDFs.contains(udfClazz));
        } catch (IllegalAccessException | InstantiationException e) {
            logger.debug("Failed to instantiate class", e);
        } catch (Exception e) {
            /*ignore this*/ }

        return null;
    }

    private HiveFuncHolder matchAndCreateUDFHolder(String udfName, Class<? extends UDF> udfClazz,
            MajorType[] argTypes, ObjectInspector[] argOIs) {
        try {
            GenericUDF udfInstance = new GenericUDFBridge(udfName, false/* is operator */, udfClazz.getName());
            ObjectInspector returnOI = udfInstance.initialize(argOIs);

            return new HiveFuncHolder(udfName, udfClazz, argTypes, returnOI,
                    Types.optional(ObjectInspectorHelper.getDrillType(returnOI)),
                    nonDeterministicUDFs.contains(udfClazz));
        } catch (Exception e) {
            /*ignore this*/ }

        return null;
    }

}