com.cloudera.recordservice.mr.RecordServiceConfig.java Source code

Introduction

Here is the source code for com.cloudera.recordservice.mr.RecordServiceConfig.java
Source

// Copyright 2012 Cloudera Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.cloudera.recordservice.mr;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;

import com.cloudera.recordservice.core.NetworkAddress;

/**
 * Config keys and values for the RecordService and utilities to set them.
 */
public class RecordServiceConfig {
    static {
        Configuration.addDefaultResource("recordservice-site.xml");
    }

    // All RecordService configuration properties, in the format of:
    // (property name, property description)
    public enum ConfVars {
        QUERY_NAME_CONF("recordservice.query", "The query to generate records from"), TBL_NAME_CONF(
                "recordservice.table.name",
                "The fully qualified table name to read"), COL_NAMES_CONF("recordservice.col.names",
                        "The subset of columns to read"), PLANNER_HOSTPORTS_CONF("recordservice.planner.hostports",
                                "Host/Port of the planner service. Comma separated list."), KERBEROS_PRINCIPAL_CONF(
                                        "recordservice.kerberos.principal", "Kerberos principal"),
        // TODO: It would be nice for the server to adjust this automatically based
        // on how fast the client is able to process records.
        FETCH_SIZE_CONF("recordservice.task.fetch.size",
                "Optional configuration option for performance tuning that configures the"
                        + " max number of records returned when fetching results from the "
                        + " RecordService. If not set, server default will be used."), MEM_LIMIT_CONF(
                                "recordservice.task.memlimit.bytes",
                                "Optional configuration for the maximum memory the server will use per task"), RECORDS_LIMIT_CONF(
                                        "recordservice.task.records.limit",
                                        "Optional configuration for the maximum number of records returned per task"), PLANNER_REQUEST_MAX_TASKS(
                                                "recordservice.task.plan.maxTasks",
                                                "Optional configuration for the hinted maximum number of tasks to generate "
                                                        + "per PlanRequest"), PLANNER_RETRY_ATTEMPTS_CONF(
                                                                "recordservice.planner.retry.attempts",
                                                                "Optional configuration for the maximum number of attempts to retry "
                                                                        + "RecordService RPCs with planner"), WORKER_RETRY_ATTEMPTS_CONF(
                                                                                "recordservice.worker.retry.attempts",
                                                                                "Optional configuration for the maximum number of attempts to retry"
                                                                                        + " RecordService RPCs with worker."), PLANNER_RETRY_SLEEP_MS_CONF(
                                                                                                "recordservice.planner.retry.sleepMs",
                                                                                                "Optional configuration for sleep between retry attempts with planner"), WORKER_RETRY_SLEEP_MS_CONF(
                                                                                                        "recordservice.worker.retry.sleepMs",
                                                                                                        "Optional configuration for sleep between retry attempts with worker"), PLANNER_RPC_TIMEOUT_MS_CONF(
                                                                                                                "recordservice.planner.rpc.timeoutMs",
                                                                                                                "Optional configuration for timeout for planner RPCs"), WORKER_RPC_TIMEOUT_MS_CONF(
                                                                                                                        "recordservice.worker.rpc.timeoutMs",
                                                                                                                        "Optional configuration for timeout for planner RPCs"), PLANNER_CONNECTION_TIMEOUT_MS_CONF(
                                                                                                                                "recordservice.planner.connection.timeoutMs",
                                                                                                                                "Optional configuration for timeout when connecting to the worker service"), WORKER_CONNECTION_TIMEOUT_MS_CONF(
                                                                                                                                        "recordservice.worker.connection.timeoutMs",
                                                                                                                                        "Optional configuration for timeout when connecting to the worker service"), WORKER_ENABLE_SERVER_LOGGING_CONF(
                                                                                                                                                "recordservice.worker.server.enableLogging",
                                                                                                                                                "Optional configuration to enable server logging (logging level from log4j)"), ZOOKEEPER_CONNECTION_STRING_CONF(
                                                                                                                                                        "recordservice.zookeeper.connectString",
                                                                                                                                                        "ZooKeeper related configurations - these are optional and only used if "
                                                                                                                                                                + "planner auto discovery is enabled"), ZOOKEEPER_CONNECT_TIMEOUTMILLIS_CONF(
                                                                                                                                                                        "recordservice.zookeeper.connectTimeoutMillis",
                                                                                                                                                                        "ZK connection timeout (in milli seconds)"), ZOOKEEPER_ZNODE_CONF(
                                                                                                                                                                                "recordservice.zookeeper.znode",
                                                                                                                                                                                "Root zookeeper directory");

        public final String name;
        public final String desc;

        // TODO: more fields (default value, types, etc)
        ConfVars(String name, String desc) {
            this.name = name;
            this.desc = desc;
        }
    }

    // TODO: define getter/setters for ConfVars. Right now users have to explicitly refer
    // to the "name" field. Probably better to hide the detail.

    public static final String DEFAULT_PLANNER_HOSTPORTS = System.getenv("RECORD_SERVICE_PLANNER_HOST") != null
            ? System.getenv("RECORD_SERVICE_PLANNER_HOST") + ":12050"
            : "localhost:12050";

    public static final String ZOOKEEPER_ZNODE_DEFAULT = "/recordservice";

    /**
     * Set the request type based on the input string.
     * - starts with "select" - assume it is a query
     * - starts with "/" - assume it is a path
     * - otherwise, assume it is a db.table
     */
    public static void setInput(Configuration config, String input) throws IOException {
        if (input.toLowerCase().startsWith("select")) {
            setInputQuery(config, input);
        } else if (input.startsWith("/")) {
            setInputPaths(config, new Path(input));
        } else {
            setInputTable(config, null, input);
        }
    }

    /**
     * Sets the input configuration to read 'cols' from 'db.tbl'. If the tbl is fully
     * qualified, db should be null.
     * If cols is empty, all cols in the table are read.
     */
    public static void setInputTable(Configuration conf, String db, String tbl, String... cols) {
        if (tbl == null || tbl == "") {
            throw new IllegalArgumentException("'tbl' must be non-empty");
        }
        if (db != null && db != "")
            tbl = db + "." + tbl;
        conf.set(ConfVars.TBL_NAME_CONF.name, tbl);
        if (cols != null && cols.length > 0) {
            for (int i = 0; i < cols.length; ++i) {
                if (cols[i] == null || cols[i].isEmpty()) {
                    throw new IllegalArgumentException("Column list cannot contain empty names.");
                }
            }
            conf.setStrings(ConfVars.COL_NAMES_CONF.name, cols);
        }
    }

    /**
     * Sets the input configuration to read results from 'query'.
     */
    public static void setInputQuery(Configuration conf, String query) {
        if (query == null)
            throw new IllegalArgumentException("'query' must be non-null");
        conf.set(ConfVars.QUERY_NAME_CONF.name, query);
    }

    /**
     * Set the array of {@link Path}s as the list of inputs
     * for the map-reduce job.
     */
    public static void setInputPaths(Configuration conf, Path... inputPaths) throws IOException {
        Path path = inputPaths[0].getFileSystem(conf).makeQualified(inputPaths[0]);
        StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString()));
        for (int i = 1; i < inputPaths.length; ++i) {
            str.append(StringUtils.COMMA_STR);
            path = inputPaths[i].getFileSystem(conf).makeQualified(inputPaths[i]);
            str.append(StringUtils.escapeString(path.toString()));
        }
        conf.set("mapred.input.dir", str.toString());
    }

    /**
     * Returns a random hostport from plannerHostPortsStr. Each hostport should be
     * comma separated.
     */
    public static List<NetworkAddress> getPlannerHostPort(String plannerHostPortsStr) throws IOException {
        String[] plannerHostPorts = plannerHostPortsStr.trim().split(",");
        if (plannerHostPorts.length == 0) {
            throw new IOException("Invalid planner host port list: " + plannerHostPortsStr);
        }
        List<NetworkAddress> result = new ArrayList<NetworkAddress>();
        for (String hostPortStr : plannerHostPorts) {
            if (hostPortStr.length() == 0)
                continue;
            String[] hostPort = hostPortStr.split(":");
            if (hostPort.length != 2) {
                throw new IOException("Invalid hostport: " + hostPortStr);
            }
            String host = hostPort[0];
            int port = 0;
            try {
                port = Integer.parseInt(hostPort[1]);
            } catch (NumberFormatException e) {
                throw new IOException("Invalid hostport: " + hostPortStr);
            }
            result.add(new NetworkAddress(host, port));
        }
        return result;
    }
}