org.apache.drill.exec.expr.fn.registry.RemoteFunctionRegistry.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.drill.exec.expr.fn.registry.RemoteFunctionRegistry.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.expr.fn.registry;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import org.apache.drill.common.AutoCloseables;
import org.apache.drill.common.config.DrillConfig;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.coord.ClusterCoordinator;
import org.apache.drill.exec.coord.store.TransientStore;
import org.apache.drill.exec.coord.store.TransientStoreConfig;
import org.apache.drill.exec.coord.store.TransientStoreListener;
import org.apache.drill.exec.exception.StoreException;
import org.apache.drill.exec.exception.VersionMismatchException;
import org.apache.drill.exec.proto.SchemaUserBitShared;
import org.apache.drill.exec.proto.UserBitShared.Registry;
import org.apache.drill.exec.store.sys.PersistentStore;
import org.apache.drill.exec.store.sys.PersistentStoreConfig;
import org.apache.drill.exec.store.sys.PersistentStoreProvider;
import org.apache.drill.exec.store.sys.store.DataChangeVersion;
import org.apache.drill.exec.util.ImpersonationUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;

import java.io.File;
import java.io.IOException;

import static com.fasterxml.jackson.databind.SerializationFeature.INDENT_OUTPUT;

/**
 * Is responsible for remote function registry management.
 * Creates all remote registry areas at startup and validates them,
 * during init establishes connections with three udf related stores.
 * Provides tools to work with three udf related stores, gives access to remote registry areas.
 *
 * There are three udf stores:
 * REGISTRY - persistent store, stores remote function registry {@link Registry} under udf path
 * which contains information about all dynamically registered jars and their function signatures.
 * If connection is created for the first time, puts empty remote registry.
 *
 * UNREGISTRATION - transient store, stores information under udf/unregister path.
 * udf/unregister path is persistent by itself but any child created will be transient.
 * Whenever user submits request to unregister jar, child path with jar name is created under this store.
 * This store also holds unregistration listener, which notifies all drill bits when child path is created,
 * so they can start local unregistration process.
 *
 * JARS - transient store, stores information under udf/jars path.
 * udf/jars path is persistent by itself but any child created will be transient.
 * Servers as lock, not allowing to perform any action on the same time.
 * There two types of actions: {@link Action#REGISTRATION} and {@link Action#UNREGISTRATION}.
 * Before starting any action, users tries to create child path with jar name under this store
 * and if such path already exists, receives action being performed on that very jar.
 * When user finishes its action, he deletes child path with jar name.
 *
 * There are three udf areas:
 * STAGING - area where user copies binary and source jars before starting registration process.
 * REGISTRY - area where registered jars are stored.
 * TMP - area where source and binary jars are backed up in unique folder during registration process.
 */
public class RemoteFunctionRegistry implements AutoCloseable {

    private static final String registry_path = "registry";
    private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(RemoteFunctionRegistry.class);
    private static final ObjectMapper mapper = new ObjectMapper().enable(INDENT_OUTPUT);

    private final TransientStoreListener unregistrationListener;
    private int retryAttempts;
    private FileSystem fs;
    private Path registryArea;
    private Path stagingArea;
    private Path tmpArea;

    private PersistentStore<Registry> registry;
    private TransientStore<String> unregistration;
    private TransientStore<String> jars;

    public RemoteFunctionRegistry(TransientStoreListener unregistrationListener) {
        this.unregistrationListener = unregistrationListener;
    }

    public void init(DrillConfig config, PersistentStoreProvider storeProvider, ClusterCoordinator coordinator) {
        prepareStores(storeProvider, coordinator);
        prepareAreas(config);
        this.retryAttempts = config.getInt(ExecConstants.UDF_RETRY_ATTEMPTS);
    }

    /**
     * Returns current remote function registry version.
     * If remote function registry is not found or unreachable, logs error and returns -1.
     *
     * @return remote function registry version if any, -1 otherwise
     */
    public long getRegistryVersion() {
        DataChangeVersion version = new DataChangeVersion();
        boolean contains = false;
        try {
            contains = registry.contains(registry_path, version);
        } catch (Exception e) {
            logger.error("Problem during trying to access remote function registry [{}]", registry_path, e);
        }
        if (contains) {
            return version.getVersion();
        } else {
            logger.error("Remote function registry [{}] is unreachable", registry_path);
            return -1;
        }
    }

    public Registry getRegistry(DataChangeVersion version) {
        return registry.get(registry_path, version);
    }

    public void updateRegistry(Registry registryContent, DataChangeVersion version)
            throws VersionMismatchException {
        registry.put(registry_path, registryContent, version);
    }

    public void submitForUnregistration(String jar) {
        unregistration.putIfAbsent(jar, jar);
    }

    public void finishUnregistration(String jar) {
        unregistration.remove(jar);
    }

    public String addToJars(String jar, Action action) {
        return jars.putIfAbsent(jar, action.toString());
    }

    public void removeFromJars(String jar) {
        jars.remove(jar);
    }

    public int getRetryAttempts() {
        return retryAttempts;
    }

    public FileSystem getFs() {
        return fs;
    }

    public Path getRegistryArea() {
        return registryArea;
    }

    public Path getStagingArea() {
        return stagingArea;
    }

    public Path getTmpArea() {
        return tmpArea;
    }

    /**
     * Connects to three stores: REGISTRY, UNREGISTRATION, JARS.
     * Puts in REGISTRY store with default instance of remote function registry if store is initiated for the first time.
     * Registers unregistration listener in UNREGISTRATION store.
     */
    private void prepareStores(PersistentStoreProvider storeProvider, ClusterCoordinator coordinator) {
        try {
            PersistentStoreConfig<Registry> registrationConfig = PersistentStoreConfig
                    .newProtoBuilder(SchemaUserBitShared.Registry.WRITE, SchemaUserBitShared.Registry.MERGE)
                    .name("udf").persist().build();
            registry = storeProvider.getOrCreateStore(registrationConfig);
            registry.putIfAbsent(registry_path, Registry.getDefaultInstance());
        } catch (StoreException e) {
            throw new DrillRuntimeException("Failure while loading remote registry.", e);
        }

        TransientStoreConfig<String> unregistrationConfig = TransientStoreConfig
                .newJacksonBuilder(mapper, String.class).name("udf/unregister").build();
        unregistration = coordinator.getOrCreateTransientStore(unregistrationConfig);
        unregistration.addListener(unregistrationListener);

        TransientStoreConfig<String> jarsConfig = TransientStoreConfig.newJacksonBuilder(mapper, String.class)
                .name("udf/jars").build();
        jars = coordinator.getOrCreateTransientStore(jarsConfig);
    }

    /**
     * Creates if absent and validates three udf areas: STAGING, REGISTRY and TMP.
     * Generated udf ares root from {@link ExecConstants#UDF_DIRECTORY_ROOT},
     * if not set, uses user home directory instead.
     */
    private void prepareAreas(DrillConfig config) {
        logger.info("Preparing three remote udf areas: staging, registry and tmp.");
        Configuration conf = new Configuration();
        if (config.hasPath(ExecConstants.UDF_DIRECTORY_FS)) {
            conf.set(FileSystem.FS_DEFAULT_NAME_KEY, config.getString(ExecConstants.UDF_DIRECTORY_FS));
        }

        try {
            this.fs = FileSystem.get(conf);
        } catch (IOException e) {
            DrillRuntimeException.format(e, "Error during file system %s setup",
                    conf.get(FileSystem.FS_DEFAULT_NAME_KEY));
        }

        String root = fs.getHomeDirectory().toUri().getPath();
        if (config.hasPath(ExecConstants.UDF_DIRECTORY_ROOT)) {
            root = config.getString(ExecConstants.UDF_DIRECTORY_ROOT);
        }

        this.registryArea = createArea(fs, root, config.getString(ExecConstants.UDF_DIRECTORY_REGISTRY));
        this.stagingArea = createArea(fs, root, config.getString(ExecConstants.UDF_DIRECTORY_STAGING));
        this.tmpArea = createArea(fs, root, config.getString(ExecConstants.UDF_DIRECTORY_TMP));
    }

    /**
     * Concatenates udf are with root directory.
     * Creates udf area, if area does not exist.
     * Checks if area exists and is directory, if it is writable for current user,
     * throws {@link DrillRuntimeException} otherwise.
     *
     * @param fs file system where area should be created or checked
     * @param root root directory
     * @param directory directory path
     * @return path to area
     */
    private Path createArea(FileSystem fs, String root, String directory) {
        Path path = new Path(new File(root, directory).toURI().getPath());
        String fullPath = path.toUri().getPath();
        try {
            fs.mkdirs(path);
            Preconditions.checkState(fs.exists(path), "Area [%s] must exist", fullPath);
            FileStatus fileStatus = fs.getFileStatus(path);
            Preconditions.checkState(fileStatus.isDirectory(), "Area [%s] must be a directory", fullPath);
            FsPermission permission = fileStatus.getPermission();
            // It is considered that process user has write rights on directory if:
            // 1. process user is owner of the directory and has write rights
            // 2. process user is in group that has write rights
            // 3. any user has write rights
            Preconditions.checkState(
                    (ImpersonationUtil.getProcessUserName().equals(fileStatus.getOwner())
                            && permission.getUserAction().implies(FsAction.WRITE))
                            || (Sets.newHashSet(ImpersonationUtil.getProcessUserGroupNames()).contains(
                                    fileStatus.getGroup()) && permission.getGroupAction().implies(FsAction.WRITE))
                            || permission.getOtherAction().implies(FsAction.WRITE),
                    "Area [%s] must be writable and executable for application user", fullPath);
        } catch (Exception e) {
            if (e instanceof DrillRuntimeException) {
                throw (DrillRuntimeException) e;
            }
            // throws
            DrillRuntimeException.format(e, "Error during udf area creation [%s] on file system [%s]", fullPath,
                    fs.getUri());
        }
        logger.info("Created remote udf area [{}] on file system [{}]", fullPath, fs.getUri());
        return path;
    }

    @Override
    public void close() {
        try {
            AutoCloseables.close(fs, registry, unregistration, jars);
        } catch (Exception e) {
            logger.warn("Failure on close()", e);
        }
    }

    public enum Action {
        REGISTRATION, UNREGISTRATION
    }

}