org.apache.hadoop.hive.service.HiveServer.java Source code

Introduction

Here is the source code for org.apache.hadoop.hive.service.HiveServer.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.service;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.ServerUtils;
import org.apache.hadoop.hive.common.LogUtils;
import org.apache.hadoop.hive.common.LogUtils.LogInitializationException;
import org.apache.hadoop.hive.common.cli.CommonCliOptions;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStore;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Schema;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.plan.api.QueryPlan;
import org.apache.hadoop.hive.ql.processors.CommandProcessor;
import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.thrift.TException;
import org.apache.thrift.TProcessor;
import org.apache.thrift.TProcessorFactory;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.server.TServer;
import org.apache.thrift.server.TThreadPoolServer;
import org.apache.thrift.transport.TServerSocket;
import org.apache.thrift.transport.TServerTransport;
import org.apache.thrift.transport.TTransport;
import org.apache.thrift.transport.TTransportFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.facebook.fb303.fb_status;

/**
 * Thrift Hive Server Implementation.
 */
public class HiveServer extends ThriftHive {
    private static final String VERSION = "1";

    /**
     * default port on which to start the Hive server
     */
    private static final int DEFAULT_HIVE_SERVER_PORT = 10000;

    /**
     * default minimum number of threads serving the Hive server
     */
    private static final int DEFAULT_MIN_WORKER_THREADS = 100;

    /**
     * default maximum number of threads serving the Hive server
     */
    private static final int DEFAULT_MAX_WORKER_THREADS = Integer.MAX_VALUE;

    /**
     * Handler which implements the Hive Interface This class can be used in lieu
     * of the HiveClient class to get an embedded server.
     */
    public static class HiveServerHandler extends HiveMetaStore.HMSHandler implements HiveInterface {
        /**
         * Hive server uses org.apache.hadoop.hive.ql.Driver for run() and
         * getResults() methods.
         * It is the instance of the last Hive query.
         */
        private Driver driver;
        private CommandProcessorResponse response;
        /**
         * For processors other than Hive queries (Driver), they output to session.out (a temp file)
         * first and the fetchOne/fetchN/fetchAll functions get the output from pipeIn.
         */
        private BufferedReader pipeIn;

        /**
         * Flag that indicates whether the last executed command was a Hive query.
         */
        private boolean isHiveQuery;

        public static final Log LOG = LogFactory.getLog(HiveServer.class.getName());

        /**
         * Construct a new handler.
         *
         * @throws MetaException unable to create metastore
         */
        public HiveServerHandler() throws MetaException {
            this(new HiveConf(SessionState.class));
        }

        /**
         * Construct a new handler with the specified hive configuration.
         *
         * @param conf caller specified hive configuration
         * @throws MetaException unable to create metastore
         */
        public HiveServerHandler(HiveConf conf) throws MetaException {
            super(HiveServer.class.getName(), conf);

            isHiveQuery = false;
            driver = null;
            SessionState session = new SessionState(conf);
            SessionState.start(session);
            setupSessionIO(session);
        }

        private void setupSessionIO(SessionState session) {
            try {
                LOG.info("Putting temp output to file " + session.getTmpOutputFile().toString());
                session.in = null; // hive server's session input stream is not used
                // open a per-session file in auto-flush mode for writing temp results
                session.out = new PrintStream(new FileOutputStream(session.getTmpOutputFile()), true, "UTF-8");
                // TODO: for hadoop jobs, progress is printed out to session.err,
                // we should find a way to feed back job progress to client
                session.err = new PrintStream(System.err, true, "UTF-8");
            } catch (IOException e) {
                LOG.error("Error in creating temp output file ", e);
                try {
                    session.in = null;
                    session.out = new PrintStream(System.out, true, "UTF-8");
                    session.err = new PrintStream(System.err, true, "UTF-8");
                } catch (UnsupportedEncodingException ee) {
                    ee.printStackTrace();
                    session.out = null;
                    session.err = null;
                }
            }
        }

        /**
         * Executes a query.
         *
         * @param cmd
         *          HiveQL query to execute
         */
        public void execute(String cmd) throws HiveServerException, TException {
            HiveServerHandler.LOG.info("Running the query: " + cmd);
            SessionState session = SessionState.get();

            String cmd_trimmed = cmd.trim();
            String[] tokens = cmd_trimmed.split("\\s");
            String cmd_1 = cmd_trimmed.substring(tokens[0].length()).trim();

            int ret = 0;
            String errorMessage = "";
            String SQLState = null;

            try {
                CommandProcessor proc = CommandProcessorFactory.get(tokens[0]);
                if (proc != null) {
                    if (proc instanceof Driver) {
                        isHiveQuery = true;
                        driver = (Driver) proc;
                        // In Hive server mode, we are not able to retry in the FetchTask
                        // case, when calling fetch quueries since execute() has returned.
                        // For now, we disable the test attempts.
                        driver.setTryCount(Integer.MAX_VALUE);
                        response = driver.run(cmd);
                    } else {
                        isHiveQuery = false;
                        driver = null;
                        // need to reset output for each non-Hive query
                        setupSessionIO(session);
                        response = proc.run(cmd_1);
                    }

                    ret = response.getResponseCode();
                    SQLState = response.getSQLState();
                    errorMessage = response.getErrorMessage();
                }
            } catch (Exception e) {
                HiveServerException ex = new HiveServerException();
                ex.setMessage("Error running query: " + e.toString());
                ex.setErrorCode(ret == 0 ? -10000 : ret);
                throw ex;
            }

            if (ret != 0) {
                throw new HiveServerException("Query returned non-zero code: " + ret + ", cause: " + errorMessage,
                        ret, SQLState);
            }
        }

        /**
         * Should be called by the client at the end of a session.
         */
        public void clean() {
            if (driver != null) {
                driver.close();
                driver.destroy();
            }

            SessionState session = SessionState.get();
            if (session.getTmpOutputFile() != null) {
                session.getTmpOutputFile().delete();
            }
            pipeIn = null;
        }

        /**
         * Return the status information about the Map-Reduce cluster.
         */
        public HiveClusterStatus getClusterStatus() throws HiveServerException, TException {
            HiveClusterStatus hcs;
            try {
                Driver drv = new Driver();
                drv.init();

                ClusterStatus cs = drv.getClusterStatus();
                JobTrackerState state = JobTrackerState
                        .valueOf(ShimLoader.getHadoopShims().getJobTrackerState(cs).name());

                hcs = new HiveClusterStatus(cs.getTaskTrackers(), cs.getMapTasks(), cs.getReduceTasks(),
                        cs.getMaxMapTasks(), cs.getMaxReduceTasks(), state);
            } catch (Exception e) {
                LOG.error(e.toString());
                e.printStackTrace();
                HiveServerException ex = new HiveServerException();
                ex.setMessage("Unable to get cluster status: " + e.toString());
                throw ex;
            }
            return hcs;
        }

        /**
         * Return the Hive schema of the query result.
         */
        public Schema getSchema() throws HiveServerException, TException {
            if (!isHiveQuery) {
                Schema schema = response.getSchema();
                if (schema == null) {
                    // Return empty schema if the last command was not a Hive query
                    return new Schema();
                } else {
                    return schema;
                }
            }

            assert driver != null : "getSchema() is called on a Hive query and driver is NULL.";

            try {
                Schema schema = driver.getSchema();
                if (schema == null) {
                    schema = new Schema();
                }
                LOG.info("Returning schema: " + schema);
                return schema;
            } catch (Exception e) {
                LOG.error(e.toString());
                e.printStackTrace();
                HiveServerException ex = new HiveServerException();
                ex.setMessage("Unable to get schema: " + e.toString());
                throw ex;
            }
        }

        /**
         * Return the Thrift schema of the query result.
         */
        public Schema getThriftSchema() throws HiveServerException, TException {
            if (!isHiveQuery) {
                // Return empty schema if the last command was not a Hive query
                return new Schema();
            }

            assert driver != null : "getThriftSchema() is called on a Hive query and driver is NULL.";

            try {
                Schema schema = driver.getThriftSchema();
                if (schema == null) {
                    schema = new Schema();
                }
                LOG.info("Returning schema: " + schema);
                return schema;
            } catch (Exception e) {
                LOG.error(e.toString());
                e.printStackTrace();
                HiveServerException ex = new HiveServerException();
                ex.setMessage("Unable to get schema: " + e.toString());
                throw ex;
            }
        }

        /**
         * Fetches the next row in a query result set.
         *
         * @return the next row in a query result set. null if there is no more row
         *         to fetch.
         */
        public String fetchOne() throws HiveServerException, TException {
            if (!isHiveQuery) {
                // Return no results if the last command was not a Hive query
                List<String> results = new ArrayList<String>(1);
                readResults(results, 1);
                if (results.size() > 0) {
                    return results.get(0);
                } else { //  throw an EOF exception
                    throw new HiveServerException("OK", 0, "");
                }
            }

            assert driver != null : "fetchOne() is called on a Hive query and driver is NULL.";

            ArrayList<String> result = new ArrayList<String>();
            driver.setMaxRows(1);
            try {
                if (driver.getResults(result)) {
                    return result.get(0);
                }
                // TODO: Cannot return null here because thrift cannot handle nulls
                // TODO: Returning empty string for now. Need to figure out how to
                // TODO: return null in some other way
                throw new HiveServerException("OK", 0, "");
                // return "";
            } catch (CommandNeedRetryException e) {
                HiveServerException ex = new HiveServerException();
                ex.setMessage(e.getMessage());
                throw ex;
            } catch (IOException e) {
                HiveServerException ex = new HiveServerException();
                ex.setMessage(e.getMessage());
                throw ex;
            }
        }

        private void cleanTmpFile() {
            if (pipeIn != null) {
                SessionState session = SessionState.get();
                File tmp = session.getTmpOutputFile();
                tmp.delete();
                pipeIn = null;
            }
        }

        /**
         * Reads the temporary results for non-Hive (non-Driver) commands to the
         * resulting List of strings.
         * @param results list of strings containing the results
         * @param nLines number of lines read at once. If it is <= 0, then read all lines.
         */
        private void readResults(List<String> results, int nLines) {

            if (pipeIn == null) {
                SessionState session = SessionState.get();
                File tmp = session.getTmpOutputFile();
                try {
                    pipeIn = new BufferedReader(new FileReader(tmp));
                } catch (FileNotFoundException e) {
                    LOG.error("File " + tmp + " not found. ", e);
                    return;
                }
            }

            boolean readAll = false;

            for (int i = 0; i < nLines || nLines <= 0; ++i) {
                try {
                    String line = pipeIn.readLine();
                    if (line == null) {
                        // reached the end of the result file
                        readAll = true;
                        break;
                    } else {
                        results.add(line);
                    }
                } catch (IOException e) {
                    LOG.error("Reading temp results encountered an exception: ", e);
                    readAll = true;
                }
            }
            if (readAll) {
                cleanTmpFile();
            }
        }

        /**
         * Fetches numRows rows.
         *
         * @param numRows
         *          Number of rows to fetch.
         * @return A list of rows. The size of the list is numRows if there are at
         *         least numRows rows available to return. The size is smaller than
         *         numRows if there aren't enough rows. The list will be empty if
         *         there is no more row to fetch or numRows == 0.
         * @throws HiveServerException
         *           Invalid value for numRows (numRows < 0)
         */
        public List<String> fetchN(int numRows) throws HiveServerException, TException {
            if (numRows < 0) {
                HiveServerException ex = new HiveServerException();
                ex.setMessage("Invalid argument for number of rows: " + numRows);
                throw ex;
            }

            ArrayList<String> result = new ArrayList<String>();

            if (!isHiveQuery) {
                readResults(result, numRows);
                return result;
            }

            assert driver != null : "fetchN() is called on a Hive query and driver is NULL.";

            driver.setMaxRows(numRows);
            try {
                driver.getResults(result);
            } catch (CommandNeedRetryException e) {
                HiveServerException ex = new HiveServerException();
                ex.setMessage(e.getMessage());
                throw ex;
            } catch (IOException e) {
                HiveServerException ex = new HiveServerException();
                ex.setMessage(e.getMessage());
                throw ex;
            }
            return result;
        }

        /**
         * Fetches all the rows in a result set.
         *
         * @return All the rows in a result set of a query executed using execute
         *         method.
         *
         *         TODO: Currently the server buffers all the rows before returning
         *         them to the client. Decide whether the buffering should be done
         *         in the client.
         */
        public List<String> fetchAll() throws HiveServerException, TException {

            ArrayList<String> rows = new ArrayList<String>();
            ArrayList<String> result = new ArrayList<String>();

            if (!isHiveQuery) {
                // Return all results if numRows <= 0
                readResults(result, 0);
                return result;
            }

            try {
                while (driver.getResults(result)) {
                    rows.addAll(result);
                    result.clear();
                }
            } catch (CommandNeedRetryException e) {
                HiveServerException ex = new HiveServerException();
                ex.setMessage(e.getMessage());
                throw ex;
            } catch (IOException e) {
                HiveServerException ex = new HiveServerException();
                ex.setMessage(e.getMessage());
                throw ex;
            }
            return rows;
        }

        /**
         * Return the status of the server.
         */
        @Override
        public fb_status getStatus() {
            return fb_status.ALIVE;
        }

        /**
         * Return the version of the server software.
         */
        @Override
        public String getVersion() {
            return VERSION;
        }

        @Override
        public QueryPlan getQueryPlan() throws HiveServerException, TException {
            QueryPlan qp = new QueryPlan();

            if (!isHiveQuery) {
                return qp;
            }

            assert driver != null : "getQueryPlan() is called on a Hive query and driver is NULL.";

            // TODO for now only return one query at a time
            // going forward, all queries associated with a single statement
            // will be returned in a single QueryPlan
            try {
                qp.addToQueries(driver.getQueryPlan());
            } catch (Exception e) {
                HiveServerException ex = new HiveServerException();
                ex.setMessage(e.toString());
                throw ex;
            }
            return qp;
        }

    }

    /**
     * ThriftHiveProcessorFactory.
     *
     */
    public static class ThriftHiveProcessorFactory extends TProcessorFactory {
        private final HiveConf conf;

        public ThriftHiveProcessorFactory(TProcessor processor, HiveConf conf) {
            super(processor);
            this.conf = conf;
        }

        @Override
        public TProcessor getProcessor(TTransport trans) {
            try {
                Iface handler = new HiveServerHandler(new HiveConf(conf));
                return new ThriftHive.Processor(handler);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }

    /**
     * HiveServer specific CLI
     *
     */
    static public class HiveServerCli extends CommonCliOptions {
        private static final String OPTION_MAX_WORKER_THREADS = "maxWorkerThreads";
        private static final String OPTION_MIN_WORKER_THREADS = "minWorkerThreads";

        public int port = DEFAULT_HIVE_SERVER_PORT;
        public int minWorkerThreads = DEFAULT_MIN_WORKER_THREADS;
        public int maxWorkerThreads = DEFAULT_MAX_WORKER_THREADS;

        @SuppressWarnings("static-access")
        public HiveServerCli() {
            super("hiveserver", true);

            // -p port
            OPTIONS.addOption(OptionBuilder.hasArg().withArgName("port")
                    .withDescription("Hive Server port number, default:" + DEFAULT_HIVE_SERVER_PORT).create('p'));

            // min worker thread count
            OPTIONS.addOption(OptionBuilder.hasArg().withLongOpt(OPTION_MIN_WORKER_THREADS)
                    .withDescription("minimum number of worker threads, default:" + DEFAULT_MIN_WORKER_THREADS)
                    .create());

            // max worker thread count
            OPTIONS.addOption(OptionBuilder.hasArg().withLongOpt(OPTION_MAX_WORKER_THREADS)
                    .withDescription("maximum number of worker threads, default:" + DEFAULT_MAX_WORKER_THREADS)
                    .create());
        }

        @Override
        public void parse(String[] args) {
            super.parse(args);

            // support the old syntax "hiveserver [port [threads]]" but complain
            args = commandLine.getArgs();
            if (args.length >= 1) {
                // complain about the deprecated syntax -- but still run
                System.err.println("This usage has been deprecated, consider using the new command "
                        + "line syntax (run with -h to see usage information)");

                port = Integer.parseInt(args[0]);
            }
            if (args.length >= 2) {
                minWorkerThreads = Integer.parseInt(args[1]);
            }

            // notice that command line options take precedence over the
            // deprecated (old style) naked args...
            if (commandLine.hasOption('p')) {
                port = Integer.parseInt(commandLine.getOptionValue('p'));
            } else {
                // legacy handling
                String hivePort = System.getenv("HIVE_PORT");
                if (hivePort != null) {
                    port = Integer.parseInt(hivePort);
                }
            }
            if (commandLine.hasOption(OPTION_MIN_WORKER_THREADS)) {
                minWorkerThreads = Integer.parseInt(commandLine.getOptionValue(OPTION_MIN_WORKER_THREADS));
            }
            if (commandLine.hasOption(OPTION_MAX_WORKER_THREADS)) {
                maxWorkerThreads = Integer.parseInt(commandLine.getOptionValue(OPTION_MAX_WORKER_THREADS));
            }
        }
    }

    public static void main(String[] args) {
        try {
            HiveServerCli cli = new HiveServerCli();

            cli.parse(args);

            // NOTE: It is critical to do this prior to initializing log4j, otherwise
            // any log specific settings via hiveconf will be ignored
            Properties hiveconf = cli.addHiveconfToSystemProperties();

            // NOTE: It is critical to do this here so that log4j is reinitialized
            // before any of the other core hive classes are loaded
            try {
                LogUtils.initHiveLog4j();
            } catch (LogInitializationException e) {
                HiveServerHandler.LOG.warn(e.getMessage());
            }

            HiveConf conf = new HiveConf(HiveServerHandler.class);
            ServerUtils.cleanUpScratchDir(conf);
            TServerTransport serverTransport = new TServerSocket(cli.port);

            // set all properties specified on the command line
            for (Map.Entry<Object, Object> item : hiveconf.entrySet()) {
                conf.set((String) item.getKey(), (String) item.getValue());
            }

            ThriftHiveProcessorFactory hfactory = new ThriftHiveProcessorFactory(null, conf);

            TThreadPoolServer.Args sargs = new TThreadPoolServer.Args(serverTransport).processorFactory(hfactory)
                    .transportFactory(new TTransportFactory()).protocolFactory(new TBinaryProtocol.Factory())
                    .minWorkerThreads(cli.minWorkerThreads).maxWorkerThreads(cli.maxWorkerThreads);

            TServer server = new TThreadPoolServer(sargs);

            String msg = "Starting hive server on port " + cli.port + " with " + cli.minWorkerThreads
                    + " min worker threads and " + cli.maxWorkerThreads + " max worker threads";
            HiveServerHandler.LOG.info(msg);
            if (cli.isVerbose()) {
                System.err.println(msg);
            }

            server.serve();
        } catch (Exception x) {
            x.printStackTrace();
        }
    }
}