org.apache.tajo.engine.function.hiveudf.HiveFunctionLoader.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tajo.engine.function.hiveudf.HiveFunctionLoader.java

Source

/***
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tajo.engine.function.hiveudf;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.io.Writable;
import org.apache.tajo.catalog.FunctionDesc;
import org.apache.tajo.catalog.FunctionDescBuilder;
import org.apache.tajo.catalog.proto.CatalogProtos;
import org.apache.tajo.common.TajoDataTypes;
import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.exception.TajoInternalError;
import org.apache.tajo.exception.UnsupportedDataTypeException;
import org.apache.tajo.function.UDFInvocationDesc;
import org.apache.tajo.plan.util.WritableTypeConverter;
import org.reflections.Reflections;
import org.reflections.util.ConfigurationBuilder;

import java.io.IOException;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.*;

public class HiveFunctionLoader {
    private static final Log LOG = LogFactory.getLog(HiveFunctionLoader.class);

    public static Optional<List<FunctionDesc>> loadHiveUDFs(TajoConf conf) {
        ArrayList<FunctionDesc> funcList = new ArrayList<>();
        String udfdir = conf.getVar(TajoConf.ConfVars.HIVE_UDF_JAR_DIR);

        try {
            // Currently Hive udf jar must be on local filesystem
            FileSystem fs = FileSystem.getLocal(conf);
            Path udfPath = new Path(udfdir);

            if (!fs.isDirectory(udfPath)) {
                LOG.warn("Hive UDF directory doesn't exist : " + udfdir);
                return Optional.empty();
            }

            // Read jar paths from the directory and change to URLs
            URL[] urls = Arrays.stream(fs.listStatus(udfPath, (Path path) -> path.getName().endsWith(".jar")))
                    .map(fstatus -> {
                        try {
                            return new URL("jar:" + fstatus.getPath().toUri().toURL() + "!/");
                        } catch (MalformedURLException e) {
                            e.printStackTrace();
                        }

                        return null;
                    }).toArray(URL[]::new);

            // Extract UDF classes and build function information
            Set<Class<? extends UDF>> udfClasses = getSubclassesFromJarEntry(urls, UDF.class);
            buildFunctionsFromUDF(udfClasses, funcList, "jar:" + urls[0].getPath());

        } catch (IOException e) {
            throw new TajoInternalError(e);
        }

        return Optional.of(funcList);
    }

    private static <T> Set<Class<? extends T>> getSubclassesFromJarEntry(URL[] urls, Class<T> targetCls) {
        Reflections refl = new Reflections(
                new ConfigurationBuilder().setUrls(urls).addClassLoader(new URLClassLoader(urls)));

        return refl.getSubTypesOf(targetCls);
    }

    static void buildFunctionsFromUDF(Set<Class<? extends UDF>> classes, List<FunctionDesc> list, String jarurl) {
        for (Class<? extends UDF> clazz : classes) {
            String[] names;
            String value = null, extended = null;

            Description desc = clazz.getAnnotation(Description.class);

            // Check @Description annotation (if exists)
            if (desc != null) {
                names = desc.name().split(",");
                for (int i = 0; i < names.length; i++) {
                    names[i] = names[i].trim();
                }

                value = desc.value();
                extended = desc.extended();
            } else {
                names = new String[] { clazz.getName().replace('.', '_') };
            }

            // actual function descriptor building
            FunctionDescBuilder builder = new FunctionDescBuilder();

            UDFType type = clazz.getDeclaredAnnotation(UDFType.class);
            if (type != null) {
                builder.setDeterministic(type.deterministic());
            }

            builder.setFunctionType(CatalogProtos.FunctionType.UDF);

            if (value != null) {
                builder.setDescription(value);
            }

            if (extended != null) {
                builder.setExample(extended);
            }

            UDFInvocationDesc udfInvocation = new UDFInvocationDesc(CatalogProtos.UDFtype.HIVE, clazz.getName(),
                    jarurl, true);

            // verify 'evaluate' method and extract return type and parameter types
            for (Method method : clazz.getMethods()) {
                if (method.getName().equals("evaluate")) {
                    registerMethod(method, names, udfInvocation, builder, list);
                }
            }
        }
    }

    private static void registerMethod(Method method, String[] names, UDFInvocationDesc udfInvocation,
            FunctionDescBuilder builder, List<FunctionDesc> list) {
        try {
            TajoDataTypes.DataType retType = WritableTypeConverter
                    .convertWritableToTajoType((Class<? extends Writable>) method.getReturnType());
            TajoDataTypes.DataType[] params = convertTajoParamterTypes(method.getParameterTypes());

            builder.setReturnType(retType).setParams(params);

            for (String name : names) {
                builder.setName(name);
                builder.setUDF(udfInvocation);
                list.add(builder.build());
            }
        } catch (UnsupportedDataTypeException e) {
            LOG.error(String.format("Hive UDF '%s' is not registered because of unsupported type: %s", names[0],
                    e.getMessage()));
        }
    }

    private static TajoDataTypes.DataType[] convertTajoParamterTypes(Class[] hiveUDFparams)
            throws UnsupportedDataTypeException {
        TajoDataTypes.DataType[] params = null;

        // convert types to ones of Tajo
        if (hiveUDFparams != null && hiveUDFparams.length > 0) {
            params = new TajoDataTypes.DataType[hiveUDFparams.length];
            for (int i = 0; i < hiveUDFparams.length; i++) {
                params[i] = WritableTypeConverter.convertWritableToTajoType(hiveUDFparams[i]);
            }
        }
        return params;
    }
}