org.apache.drill.exec.expr.fn.FunctionImplementationRegistry.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.drill.exec.expr.fn.FunctionImplementationRegistry.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.expr.fn;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.net.JarURLConnection;
import java.net.URL;
import java.net.URLClassLoader;
import java.net.URLConnection;
import java.util.Enumeration;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;

import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import com.google.common.io.Files;
import com.typesafe.config.ConfigFactory;
import org.apache.commons.io.FileUtils;
import org.apache.drill.common.config.CommonConstants;
import org.apache.drill.common.config.DrillConfig;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.expression.FunctionCall;
import org.apache.drill.common.expression.fn.CastFunctions;
import org.apache.drill.common.scanner.ClassPathScanner;
import org.apache.drill.common.scanner.RunTimeScan;
import org.apache.drill.common.scanner.persistence.ScanResult;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.coord.store.TransientStoreEvent;
import org.apache.drill.exec.coord.store.TransientStoreListener;
import org.apache.drill.exec.exception.FunctionValidationException;
import org.apache.drill.exec.exception.JarValidationException;
import org.apache.drill.exec.expr.fn.registry.LocalFunctionRegistry;
import org.apache.drill.exec.expr.fn.registry.JarScan;
import org.apache.drill.exec.expr.fn.registry.RemoteFunctionRegistry;
import org.apache.drill.exec.planner.sql.DrillOperatorTable;
import org.apache.drill.exec.proto.UserBitShared.Jar;
import org.apache.drill.exec.resolver.FunctionResolver;
import org.apache.drill.exec.server.options.OptionManager;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Lists;
import org.apache.drill.exec.util.JarUtil;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/**
 * This class offers the registry for functions. Notably, in addition to Drill its functions
 * (in {@link LocalFunctionRegistry}), other PluggableFunctionRegistry (e.g., {@link org.apache.drill.exec.expr.fn.HiveFunctionRegistry})
 * is also registered in this class
 */
public class FunctionImplementationRegistry implements FunctionLookupContext, AutoCloseable {
    private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory
            .getLogger(FunctionImplementationRegistry.class);

    private final LocalFunctionRegistry localFunctionRegistry;
    private final RemoteFunctionRegistry remoteFunctionRegistry;
    private final Path localUdfDir;
    private boolean deleteTmpDir = false;
    private File tmpDir;
    private List<PluggableFunctionRegistry> pluggableFuncRegistries = Lists.newArrayList();
    private OptionManager optionManager = null;

    @Deprecated
    @VisibleForTesting
    public FunctionImplementationRegistry(DrillConfig config) {
        this(config, ClassPathScanner.fromPrescan(config));
    }

    public FunctionImplementationRegistry(DrillConfig config, ScanResult classpathScan) {
        Stopwatch w = Stopwatch.createStarted();

        logger.debug("Generating function registry.");
        localFunctionRegistry = new LocalFunctionRegistry(classpathScan);

        Set<Class<? extends PluggableFunctionRegistry>> registryClasses = classpathScan
                .getImplementations(PluggableFunctionRegistry.class);

        for (Class<? extends PluggableFunctionRegistry> clazz : registryClasses) {
            for (Constructor<?> c : clazz.getConstructors()) {
                Class<?>[] params = c.getParameterTypes();
                if (params.length != 1 || params[0] != DrillConfig.class) {
                    logger.warn(
                            "Skipping PluggableFunctionRegistry constructor {} for class {} since it doesn't implement a "
                                    + "[constructor(DrillConfig)]",
                            c, clazz);
                    continue;
                }

                try {
                    PluggableFunctionRegistry registry = (PluggableFunctionRegistry) c.newInstance(config);
                    pluggableFuncRegistries.add(registry);
                } catch (InstantiationException | IllegalAccessException | IllegalArgumentException
                        | InvocationTargetException e) {
                    logger.warn("Unable to instantiate PluggableFunctionRegistry class '{}'. Skipping it.", clazz,
                            e);
                }

                break;
            }
        }
        logger.info("Function registry loaded.  {} functions loaded in {} ms.", localFunctionRegistry.size(),
                w.elapsed(TimeUnit.MILLISECONDS));
        this.remoteFunctionRegistry = new RemoteFunctionRegistry(new UnregistrationListener());
        this.localUdfDir = getLocalUdfDir(config);
    }

    public FunctionImplementationRegistry(DrillConfig config, ScanResult classpathScan,
            OptionManager optionManager) {
        this(config, classpathScan);
        this.optionManager = optionManager;
    }

    /**
     * Register functions in given operator table.
     * @param operatorTable
     */
    public void register(DrillOperatorTable operatorTable) {
        // Register Drill functions first and move to pluggable function registries.
        localFunctionRegistry.register(operatorTable);

        for (PluggableFunctionRegistry registry : pluggableFuncRegistries) {
            registry.register(operatorTable);
        }
    }

    /**
     * Using the given <code>functionResolver</code> find Drill function implementation for given
     * <code>functionCall</code>
     * If function implementation was not found and in case if Dynamic UDF Support is enabled
     * loads all missing remote functions and tries to find Drill implementation one more time.
     */
    @Override
    public DrillFuncHolder findDrillFunction(FunctionResolver functionResolver, FunctionCall functionCall) {
        return findDrillFunction(functionResolver, functionCall, true);
    }

    private DrillFuncHolder findDrillFunction(FunctionResolver functionResolver, FunctionCall functionCall,
            boolean retry) {
        AtomicLong version = new AtomicLong();
        DrillFuncHolder holder = functionResolver.getBestMatch(
                localFunctionRegistry.getMethods(functionReplacement(functionCall), version), functionCall);
        if (holder == null && retry) {
            if (optionManager != null
                    && optionManager.getOption(ExecConstants.DYNAMIC_UDF_SUPPORT_ENABLED).bool_val) {
                if (loadRemoteFunctions(version.get())) {
                    return findDrillFunction(functionResolver, functionCall, false);
                }
            }
        }
        return holder;
    }

    // Check if this Function Replacement is needed; if yes, return a new name. otherwise, return the original name
    private String functionReplacement(FunctionCall functionCall) {
        String funcName = functionCall.getName();
        if (functionCall.args.size() > 0) {
            MajorType majorType = functionCall.args.get(0).getMajorType();
            DataMode dataMode = majorType.getMode();
            MinorType minorType = majorType.getMinorType();
            if (optionManager != null && optionManager.getOption(ExecConstants.CAST_TO_NULLABLE_NUMERIC).bool_val
                    && CastFunctions.isReplacementNeeded(funcName, minorType)) {
                funcName = CastFunctions.getReplacingCastFunction(funcName, dataMode, minorType);
            }
        }

        return funcName;
    }

    /**
     * Find the Drill function implementation that matches the name, arg types and return type.
     * If exact function implementation was not found and in case if Dynamic UDF Support is enabled
     * loads all missing remote functions and tries to find Drill implementation one more time.
     */
    public DrillFuncHolder findExactMatchingDrillFunction(String name, List<MajorType> argTypes,
            MajorType returnType) {
        return findExactMatchingDrillFunction(name, argTypes, returnType, true);
    }

    private DrillFuncHolder findExactMatchingDrillFunction(String name, List<MajorType> argTypes,
            MajorType returnType, boolean retry) {
        AtomicLong version = new AtomicLong();
        for (DrillFuncHolder h : localFunctionRegistry.getMethods(name, version)) {
            if (h.matches(returnType, argTypes)) {
                return h;
            }
        }

        if (retry && optionManager != null
                && optionManager.getOption(ExecConstants.DYNAMIC_UDF_SUPPORT_ENABLED).bool_val) {
            if (loadRemoteFunctions(version.get())) {
                return findExactMatchingDrillFunction(name, argTypes, returnType, false);
            }
        }
        return null;
    }

    /**
     * Find function implementation for given <code>functionCall</code> in non-Drill function registries such as Hive UDF
     * registry.
     *
     * Note: Order of searching is same as order of {@link org.apache.drill.exec.expr.fn.PluggableFunctionRegistry}
     * implementations found on classpath.
     *
     * @param functionCall
     * @return
     */
    @Override
    public AbstractFuncHolder findNonDrillFunction(FunctionCall functionCall) {
        for (PluggableFunctionRegistry registry : pluggableFuncRegistries) {
            AbstractFuncHolder h = registry.getFunction(functionCall);
            if (h != null) {
                return h;
            }
        }

        return null;
    }

    // Method to find if the output type of a drill function if of complex type
    public boolean isFunctionComplexOutput(String name) {
        List<DrillFuncHolder> methods = localFunctionRegistry.getMethods(name);
        for (DrillFuncHolder holder : methods) {
            if (holder.getReturnValue().isComplexWriter()) {
                return true;
            }
        }
        return false;
    }

    public RemoteFunctionRegistry getRemoteFunctionRegistry() {
        return remoteFunctionRegistry;
    }

    /**
     * Using given local path to jar creates unique class loader for this jar.
     * Class loader is closed to release opened connection to jar when validation is finished.
     * Scan jar content to receive list of all scanned classes
     * and starts validation process against local function registry.
     * Checks if received list of validated function is not empty.
     *
     * @param path local path to jar we need to validate
     * @return list of validated function signatures
     */
    public List<String> validate(Path path) throws IOException {
        URL url = path.toUri().toURL();
        URL[] urls = { url };
        try (URLClassLoader classLoader = new URLClassLoader(urls)) {
            ScanResult jarScanResult = scan(classLoader, path, urls);
            List<String> functions = localFunctionRegistry.validate(path.getName(), jarScanResult);
            if (functions.isEmpty()) {
                throw new FunctionValidationException(
                        String.format("Jar %s does not contain functions", path.getName()));
            }
            return functions;
        }
    }

    /**
     * Attempts to load and register functions from remote function registry.
     * First checks if there is no missing jars.
     * If yes, enters synchronized block to prevent other loading the same jars.
     * Again re-checks if there are no missing jars in case someone has already loaded them (double-check lock).
     * If there are still missing jars, first copies jars to local udf area and prepares {@link JarScan} for each jar.
     * Jar registration timestamp represented in milliseconds is used as suffix.
     * Then registers all jars at the same time. Returns true when finished.
     * In case if any errors during jars coping or registration, logs errors and proceeds.
     *
     * If no missing jars are found, checks current local registry version.
     * Returns false if versions match, true otherwise.
     *
     * @param version local function registry version
     * @return true if new jars were registered or local function registry version is different, false otherwise
     */
    public boolean loadRemoteFunctions(long version) {
        List<String> missingJars = getMissingJars(remoteFunctionRegistry, localFunctionRegistry);
        if (!missingJars.isEmpty()) {
            synchronized (this) {
                missingJars = getMissingJars(remoteFunctionRegistry, localFunctionRegistry);
                List<JarScan> jars = Lists.newArrayList();
                for (String jarName : missingJars) {
                    Path binary = null;
                    Path source = null;
                    URLClassLoader classLoader = null;
                    try {
                        binary = copyJarToLocal(jarName, remoteFunctionRegistry);
                        source = copyJarToLocal(JarUtil.getSourceName(jarName), remoteFunctionRegistry);
                        URL[] urls = { binary.toUri().toURL(), source.toUri().toURL() };
                        classLoader = new URLClassLoader(urls);
                        ScanResult scanResult = scan(classLoader, binary, urls);
                        localFunctionRegistry.validate(jarName, scanResult);
                        jars.add(new JarScan(jarName, scanResult, classLoader));
                    } catch (Exception e) {
                        deleteQuietlyLocalJar(binary);
                        deleteQuietlyLocalJar(source);
                        if (classLoader != null) {
                            try {
                                classLoader.close();
                            } catch (Exception ex) {
                                logger.warn("Problem during closing class loader for {}", jarName, e);
                            }
                        }
                        logger.error("Problem during remote functions load from {}", jarName, e);
                    }
                }
                if (!jars.isEmpty()) {
                    localFunctionRegistry.register(jars);
                    return true;
                }
            }
        }
        return version != localFunctionRegistry.getVersion();
    }

    /**
     * First finds path to marker file url, otherwise throws {@link JarValidationException}.
     * Then scans jar classes according to list indicated in marker files.
     * Additional logic is added to close {@link URL} after {@link ConfigFactory#parseURL(URL)}.
     * This is extremely important for Windows users where system doesn't allow to delete file if it's being used.
     *
     * @param classLoader unique class loader for jar
     * @param path local path to jar
     * @param urls urls associated with the jar (ex: binary and source)
     * @return scan result of packages, classes, annotations found in jar
     */
    private ScanResult scan(ClassLoader classLoader, Path path, URL[] urls) throws IOException {
        Enumeration<URL> markerFileEnumeration = classLoader
                .getResources(CommonConstants.DRILL_JAR_MARKER_FILE_RESOURCE_PATHNAME);
        while (markerFileEnumeration.hasMoreElements()) {
            URL markerFile = markerFileEnumeration.nextElement();
            if (markerFile.getPath().contains(path.toUri().getPath())) {
                URLConnection markerFileConnection = null;
                try {
                    markerFileConnection = markerFile.openConnection();
                    DrillConfig drillConfig = DrillConfig.create(ConfigFactory.parseURL(markerFile));
                    return RunTimeScan.dynamicPackageScan(drillConfig, Sets.newHashSet(urls));
                } finally {
                    if (markerFileConnection instanceof JarURLConnection) {
                        ((JarURLConnection) markerFile.openConnection()).getJarFile().close();
                    }
                }
            }
        }
        throw new JarValidationException(String.format("Marker file %s is missing in %s",
                CommonConstants.DRILL_JAR_MARKER_FILE_RESOURCE_PATHNAME, path.getName()));
    }

    /**
     * Return list of jars that are missing in local function registry
     * but present in remote function registry.
     *
     * @param remoteFunctionRegistry remote function registry
     * @param localFunctionRegistry local function registry
     * @return list of missing jars
     */
    private List<String> getMissingJars(RemoteFunctionRegistry remoteFunctionRegistry,
            LocalFunctionRegistry localFunctionRegistry) {
        List<Jar> remoteJars = remoteFunctionRegistry.getRegistry().getJarList();
        List<String> localJars = localFunctionRegistry.getAllJarNames();
        List<String> missingJars = Lists.newArrayList();
        for (Jar jar : remoteJars) {
            if (!localJars.contains(jar.getName())) {
                missingJars.add(jar.getName());
            }
        }
        return missingJars;
    }

    /**
     * Creates local udf directory, if it doesn't exist.
     * Checks if local udf directory is a directory and if current application has write rights on it.
     * Attempts to clean up local udf directory in case jars were left after previous drillbit run.
     * Local udf directory path is concatenated from drill temporary directory and ${drill.exec.udf.directory.local}.
     *
     * @param config drill config
     * @return path to local udf directory
     */
    private Path getLocalUdfDir(DrillConfig config) {
        tmpDir = getTmpDir(config);
        File udfDir = new File(tmpDir, config.getString(ExecConstants.UDF_DIRECTORY_LOCAL));
        udfDir.mkdirs();
        String udfPath = udfDir.getPath();
        Preconditions.checkState(udfDir.exists(), "Local udf directory [%s] must exist", udfPath);
        Preconditions.checkState(udfDir.isDirectory(), "Local udf directory [%s] must be a directory", udfPath);
        Preconditions.checkState(udfDir.canWrite(),
                "Local udf directory [%s] must be writable for application user", udfPath);
        try {
            FileUtils.cleanDirectory(udfDir);
        } catch (IOException e) {
            throw new DrillRuntimeException("Error during local udf directory clean up", e);
        }
        return new Path(udfDir.toURI());
    }

    /**
     * First tries to get drill temporary directory value from environmental variable $DRILL_TMP_DIR,
     * then from config ${drill.tmp-dir}.
     * If value is still missing, generates directory using {@link Files#createTempDir()}.
     * If temporary directory was generated, sets {@link #deleteTmpDir} to true
     * to delete directory on drillbit exit.
     * @return drill temporary directory path
     */
    private File getTmpDir(DrillConfig config) {
        String drillTempDir = System.getenv("DRILL_TMP_DIR");

        if (drillTempDir == null && config.hasPath(ExecConstants.DRILL_TMP_DIR)) {
            drillTempDir = config.getString(ExecConstants.DRILL_TMP_DIR);
        }

        if (drillTempDir == null) {
            deleteTmpDir = true;
            return Files.createTempDir();
        }

        return new File(drillTempDir);
    }

    /**
     * Copies jar from remote udf area to local udf area.
     *
     * @param jarName jar name to be copied
     * @param remoteFunctionRegistry remote function registry
     * @return local path to jar that was copied
     * @throws IOException in case of problems during jar coping process
     */
    private Path copyJarToLocal(String jarName, RemoteFunctionRegistry remoteFunctionRegistry) throws IOException {
        Path registryArea = remoteFunctionRegistry.getRegistryArea();
        FileSystem fs = remoteFunctionRegistry.getFs();
        Path remoteJar = new Path(registryArea, jarName);
        Path localJar = new Path(localUdfDir, jarName);
        try {
            fs.copyToLocalFile(remoteJar, localJar);
        } catch (IOException e) {
            String message = String.format("Error during jar [%s] coping from [%s] to [%s]", jarName,
                    registryArea.toUri().getPath(), localUdfDir.toUri().getPath());
            throw new IOException(message, e);
        }
        return localJar;
    }

    /**
     * Deletes quietly local jar but first checks if path to jar is not null.
     *
     * @param jar path to jar
     */
    private void deleteQuietlyLocalJar(Path jar) {
        if (jar != null) {
            FileUtils.deleteQuietly(new File(jar.toUri().getPath()));
        }
    }

    /**
     * If {@link #deleteTmpDir} is set to true, deletes generated temporary directory.
     * Otherwise cleans up {@link #localUdfDir}.
     */
    @Override
    public void close() {
        if (deleteTmpDir) {
            FileUtils.deleteQuietly(tmpDir);
        } else {
            try {
                FileUtils.cleanDirectory(new File(localUdfDir.toUri().getPath()));
            } catch (IOException e) {
                logger.warn("Problems during local udf directory clean up", e);
            }
        }
    }

    /**
     * Fires when jar name is submitted for unregistration.
     * Will unregister all functions associated with the jar name
     * and delete binary and source associated with the jar from local udf directory
     */
    public class UnregistrationListener implements TransientStoreListener {

        @Override
        public void onChange(TransientStoreEvent event) {
            String jarName = (String) event.getValue();
            localFunctionRegistry.unregister(jarName);
            String localDir = localUdfDir.toUri().getPath();
            FileUtils.deleteQuietly(new File(localDir, jarName));
            FileUtils.deleteQuietly(new File(localDir, JarUtil.getSourceName(jarName)));
        }
    }

}