com.facebook.presto.hive.HdfsConfiguration.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.presto.hive.HdfsConfiguration.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive;

import com.google.common.collect.ImmutableList;
import com.google.common.net.HostAndPort;
import com.google.common.primitives.Ints;
import io.airlift.units.Duration;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.net.DNSToSwitchMapping;
import org.apache.hadoop.net.SocksSocketFactory;

import javax.inject.Inject;
import javax.net.SocketFactory;

import java.io.File;
import java.util.List;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static java.lang.String.format;

public class HdfsConfiguration {
    private final HostAndPort socksProxy;
    private final Duration dfsTimeout;
    private final Duration dfsConnectTimeout;
    private final int dfsConnectMaxRetries;
    private final String domainSocketPath;
    private final String s3AwsAccessKey;
    private final String s3AwsSecretKey;
    private final boolean s3SslEnabled;
    private final int s3MaxClientRetries;
    private final int s3MaxErrorRetries;
    private final Duration s3ConnectTimeout;
    private final File s3StagingDirectory;
    private final List<String> resourcePaths;

    @SuppressWarnings("ThreadLocalNotStaticFinal")
    private final ThreadLocal<Configuration> hadoopConfiguration = new ThreadLocal<Configuration>() {
        @Override
        protected Configuration initialValue() {
            return createConfiguration();
        }
    };

    @Inject
    public HdfsConfiguration(HiveClientConfig hiveClientConfig) {
        checkNotNull(hiveClientConfig, "hiveClientConfig is null");
        checkArgument(hiveClientConfig.getDfsTimeout().toMillis() >= 1, "dfsTimeout must be at least 1 ms");

        this.socksProxy = hiveClientConfig.getMetastoreSocksProxy();
        this.dfsTimeout = hiveClientConfig.getDfsTimeout();
        this.dfsConnectTimeout = hiveClientConfig.getDfsConnectTimeout();
        this.dfsConnectMaxRetries = hiveClientConfig.getDfsConnectMaxRetries();
        this.domainSocketPath = hiveClientConfig.getDomainSocketPath();
        this.s3AwsAccessKey = hiveClientConfig.getS3AwsAccessKey();
        this.s3AwsSecretKey = hiveClientConfig.getS3AwsSecretKey();
        this.s3SslEnabled = hiveClientConfig.isS3SslEnabled();
        this.s3MaxClientRetries = hiveClientConfig.getS3MaxClientRetries();
        this.s3MaxErrorRetries = hiveClientConfig.getS3MaxErrorRetries();
        this.s3ConnectTimeout = hiveClientConfig.getS3ConnectTimeout();
        this.s3StagingDirectory = hiveClientConfig.getS3StagingDirectory();
        this.resourcePaths = hiveClientConfig.getResourceConfigFiles();
    }

    @SuppressWarnings("UnusedParameters")
    public Configuration getConfiguration(String host) {
        // subclasses can provide per-host configuration
        return hadoopConfiguration.get();
    }

    protected Configuration createConfiguration() {
        Configuration config = new Configuration();

        if (resourcePaths != null) {
            for (String resourcePath : resourcePaths) {
                config.addResource(new Path(resourcePath));
            }
        }

        // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local
        config.setClass("topology.node.switch.mapping.impl", NoOpDNSToSwitchMapping.class,
                DNSToSwitchMapping.class);

        if (socksProxy != null) {
            config.setClass("hadoop.rpc.socket.factory.class.default", SocksSocketFactory.class,
                    SocketFactory.class);
            config.set("hadoop.socks.server", socksProxy.toString());
        }

        if (domainSocketPath != null) {
            config.setStrings("dfs.domain.socket.path", domainSocketPath);
        }

        // only enable short circuit reads if domain socket path is properly configured
        if (!config.get("dfs.domain.socket.path", "").trim().isEmpty()) {
            config.setBooleanIfUnset("dfs.client.read.shortcircuit", true);
        }

        config.setInt("dfs.socket.timeout", Ints.checkedCast(dfsTimeout.toMillis()));
        config.setInt("ipc.ping.interval", Ints.checkedCast(dfsTimeout.toMillis()));
        config.setInt("ipc.client.connect.timeout", Ints.checkedCast(dfsConnectTimeout.toMillis()));
        config.setInt("ipc.client.connect.max.retries", dfsConnectMaxRetries);

        // re-map filesystem schemes to match Amazon Elastic MapReduce
        config.set("fs.s3.impl", PrestoS3FileSystem.class.getName());
        config.set("fs.s3n.impl", PrestoS3FileSystem.class.getName());
        config.set("fs.s3bfs.impl", "org.apache.hadoop.fs.s3.S3FileSystem");

        // set AWS credentials for S3
        for (String scheme : ImmutableList.of("s3", "s3bfs", "s3n")) {
            if (s3AwsAccessKey != null) {
                config.set(format("fs.%s.awsAccessKeyId", scheme), s3AwsAccessKey);
            }
            if (s3AwsSecretKey != null) {
                config.set(format("fs.%s.awsSecretAccessKey", scheme), s3AwsSecretKey);
            }
        }

        // set config for S3
        config.setBoolean(PrestoS3FileSystem.S3_SSL_ENABLED, s3SslEnabled);
        config.setInt(PrestoS3FileSystem.S3_MAX_CLIENT_RETRIES, s3MaxClientRetries);
        config.setInt(PrestoS3FileSystem.S3_MAX_ERROR_RETRIES, s3MaxErrorRetries);
        config.set(PrestoS3FileSystem.S3_CONNECT_TIMEOUT, s3ConnectTimeout.toString());
        config.set(PrestoS3FileSystem.S3_STAGING_DIRECTORY, s3StagingDirectory.toString());

        updateConfiguration(config);

        return config;
    }

    @SuppressWarnings("UnusedParameters")
    protected void updateConfiguration(Configuration config) {
        // allow subclasses to modify configuration objects
    }

    public static class NoOpDNSToSwitchMapping implements DNSToSwitchMapping {
        @Override
        public List<String> resolve(List<String> names) {
            // dfs client expects an empty list as an indication that the host->switch mapping for the given names are not known
            return ImmutableList.of();
        }

        @Override
        public void reloadCachedMappings() {
            // no-op
        }
    }
}