com.cloudera.cdk.data.hcatalog.impl.Loader.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.cdk.data.hcatalog.impl.Loader.java

Source

/*
 * Copyright 2013 Cloudera.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera.cdk.data.hcatalog.impl;

import com.cloudera.cdk.data.DatasetRepositories;
import com.cloudera.cdk.data.DatasetRepository;
import com.cloudera.cdk.data.DatasetRepositoryException;
import com.cloudera.cdk.data.hcatalog.HCatalogDatasetRepository;
import com.cloudera.cdk.data.impl.Accessor;
import com.cloudera.cdk.data.spi.Loadable;
import com.cloudera.cdk.data.spi.OptionBuilder;
import com.cloudera.cdk.data.spi.URIPattern;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A Loader implementation to register URIs for FileSystemDatasetRepositories.
 */
public class Loader implements Loadable {

    private static final Logger logger = LoggerFactory.getLogger(Loader.class);

    public static final String HIVE_METASTORE_URI_PROP = "hive.metastore.uris";
    private static final int UNSPECIFIED_PORT = -1;

    /**
     * This class builds configured instances of
     * {@code FileSystemDatasetRepository} from a Map of options. This is for the
     * URI system.
     */
    private static class ExternalBuilder implements OptionBuilder<DatasetRepository> {

        private final Configuration envConf;

        public ExternalBuilder(Configuration envConf) {
            this.envConf = envConf;
        }

        @Override
        public DatasetRepository getFromOptions(Map<String, String> match) {
            logger.debug("External URI options: {}", match);
            final Path root;
            String path = match.get("path");
            if (path == null || path.isEmpty()) {
                root = new Path(".");
            } else if (match.containsKey("absolute") && Boolean.valueOf(match.get("absolute"))) {
                root = new Path("/", path);
            } else {
                root = new Path(path);
            }
            final FileSystem fs;
            try {
                fs = FileSystem.get(fileSystemURI("hdfs", match), envConf);
            } catch (IOException ex) {
                throw new DatasetRepositoryException("Could not get a FileSystem", ex);
            }

            // make a modifiable copy and setup the MetaStore URI
            Configuration conf = new Configuration(envConf);
            setMetaStoreURI(conf, match);

            return new HCatalogDatasetRepository.Builder().configuration(conf).rootDirectory(fs.makeQualified(root))
                    .build();
        }
    }

    private static class ManagedBuilder implements OptionBuilder<DatasetRepository> {
        private final Configuration envConf;

        public ManagedBuilder(Configuration envConf) {
            this.envConf = envConf;
        }

        @Override
        public DatasetRepository getFromOptions(Map<String, String> match) {
            logger.debug("Managed URI options: {}", match);
            // make a modifiable copy and setup the MetaStore URI
            Configuration conf = new Configuration(envConf);
            setMetaStoreURI(conf, match);
            return new HCatalogDatasetRepository.Builder().configuration(conf).build();
        }
    }

    @Override
    public void load() {
        // get a default Configuration to configure defaults (so it's okay!)
        final Configuration conf = new Configuration();

        String hiveAuthority;
        if (conf.get(HIVE_METASTORE_URI_PROP) != null) {
            try {
                hiveAuthority = new URI(conf.get(HIVE_METASTORE_URI_PROP)).getAuthority();
            } catch (URISyntaxException ex) {
                hiveAuthority = "";
            }
        } else {
            hiveAuthority = "";
        }

        // Hive-managed data sets
        final OptionBuilder<DatasetRepository> managedBuilder = new ManagedBuilder(conf);
        Accessor.getDefault().registerDatasetRepository(new URIPattern(URI.create("hive")), managedBuilder);
        Accessor.getDefault().registerDatasetRepository(new URIPattern(URI.create("hive://" + hiveAuthority + "/")),
                managedBuilder);

        // external data sets
        final OptionBuilder<DatasetRepository> externalBuilder = new ExternalBuilder(conf);

        String hdfsAuthority;
        try {
            // Use a HDFS URI with no authority and the environment's configuration
            // to find the default HDFS information
            final URI hdfs = FileSystem.get(URI.create("hdfs:/"), conf).getUri();
            hdfsAuthority = "&hdfs-host=" + hdfs.getHost() + "&hdfs-port=" + hdfs.getPort();
        } catch (IOException ex) {
            logger.warn("Could not locate HDFS, hdfs-host and hdfs-port "
                    + "will not be set by default for Hive repositories.");
            hdfsAuthority = "";
        }

        Accessor.getDefault().registerDatasetRepository(
                new URIPattern(URI.create("hive://" + hiveAuthority + "/*path?absolute=true" + hdfsAuthority)),
                externalBuilder);
        Accessor.getDefault().registerDatasetRepository(new URIPattern(URI.create("hive:*path")), externalBuilder);
    }

    private static URI fileSystemURI(String scheme, Map<String, String> match) {
        final String userInfo;
        if (match.containsKey("username")) {
            if (match.containsKey("password")) {
                userInfo = match.get("username") + ":" + match.get("password");
            } else {
                userInfo = match.get("username");
            }
        } else {
            userInfo = null;
        }
        try {
            int port = UNSPECIFIED_PORT;
            if (match.containsKey("hdfs-port")) {
                try {
                    port = Integer.parseInt(match.get("hdfs-port"));
                } catch (NumberFormatException e) {
                    port = UNSPECIFIED_PORT;
                }
            }
            return new URI(scheme, userInfo, match.get("hdfs-host"), port, "/", null, null);
        } catch (URISyntaxException ex) {
            throw new DatasetRepositoryException("Could not build FS URI", ex);
        }
    }

    /**
     * Sets the MetaStore URI in the given Configuration, if there is a host in
     * the match arguments. If there is no host, then the conf is not changed.
     *
     * @param conf a Configuration that will be used to connect to the MetaStore
     * @param match URIPattern match results
     */
    private static void setMetaStoreURI(Configuration conf, Map<String, String> match) {
        try {
            int port = UNSPECIFIED_PORT;
            if (match.containsKey("port")) {
                try {
                    port = Integer.parseInt(match.get("port"));
                } catch (NumberFormatException e) {
                    port = UNSPECIFIED_PORT;
                }
            }
            // if either the host or the port is set, construct a new MetaStore URI
            // and set the property in the Configuration. otherwise, this will not
            // change the connection URI.
            if (match.containsKey("host")) {
                conf.set(HIVE_METASTORE_URI_PROP,
                        new URI("thrift", null, match.get("host"), port, "/", null, null).toString());
            }
        } catch (URISyntaxException ex) {
            throw new DatasetRepositoryException("Could not build metastore URI", ex);
        }
    }
}