Java tutorial
// Copyright 2012 Cloudera Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.cloudera.impala.service; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.Collections; import java.util.Enumeration; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HAUtil; import org.apache.log4j.Appender; import org.apache.log4j.FileAppender; import org.apache.thrift.TException; import org.apache.thrift.TSerializer; import org.apache.thrift.protocol.TBinaryProtocol; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.cloudera.impala.analysis.ToSqlUtils; import com.cloudera.impala.authorization.AuthorizationConfig; import com.cloudera.impala.authorization.ImpalaInternalAdminUser; import com.cloudera.impala.authorization.User; import com.cloudera.impala.catalog.DataSource; import com.cloudera.impala.catalog.Function; import com.cloudera.impala.catalog.Role; import com.cloudera.impala.common.FileSystemUtil; import com.cloudera.impala.common.ImpalaException; import com.cloudera.impala.common.InternalException; import com.cloudera.impala.common.JniUtil; import com.cloudera.impala.thrift.TCatalogObject; import com.cloudera.impala.thrift.TDescribeTableParams; import com.cloudera.impala.thrift.TDescribeTableResult; import com.cloudera.impala.thrift.TExecRequest; import com.cloudera.impala.thrift.TGetAllHadoopConfigsResponse; import com.cloudera.impala.thrift.TGetDataSrcsParams; import com.cloudera.impala.thrift.TGetDataSrcsResult; import com.cloudera.impala.thrift.TGetDbsParams; import com.cloudera.impala.thrift.TGetDbsResult; import com.cloudera.impala.thrift.TGetFunctionsParams; import com.cloudera.impala.thrift.TGetFunctionsResult; import com.cloudera.impala.thrift.TGetHadoopConfigRequest; import com.cloudera.impala.thrift.TGetHadoopConfigResponse; import com.cloudera.impala.thrift.TGetTablesParams; import com.cloudera.impala.thrift.TGetTablesResult; import com.cloudera.impala.thrift.TLoadDataReq; import com.cloudera.impala.thrift.TLoadDataResp; import com.cloudera.impala.thrift.TLogLevel; import com.cloudera.impala.thrift.TMetadataOpRequest; import com.cloudera.impala.thrift.TQueryCtx; import com.cloudera.impala.thrift.TResultSet; import com.cloudera.impala.thrift.TShowGrantRoleParams; import com.cloudera.impala.thrift.TShowRolesParams; import com.cloudera.impala.thrift.TShowRolesResult; import com.cloudera.impala.thrift.TShowStatsParams; import com.cloudera.impala.thrift.TTableName; import com.cloudera.impala.thrift.TUpdateCatalogCacheRequest; import com.cloudera.impala.util.GlogAppender; import com.cloudera.impala.util.TSessionStateUtil; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; /** * JNI-callable interface onto a wrapped Frontend instance. The main point is to serialise * and deserialise thrift structures between C and Java. */ public class JniFrontend { private final static Logger LOG = LoggerFactory.getLogger(JniFrontend.class); private final static TBinaryProtocol.Factory protocolFactory_ = new TBinaryProtocol.Factory(); private final Frontend frontend_; /** * Create a new instance of the Jni Frontend. */ public JniFrontend(boolean lazy, String serverName, String authorizationPolicyFile, String sentryConfigFile, String authPolicyProviderClass, int impalaLogLevel, int otherLogLevel) throws InternalException { GlogAppender.Install(TLogLevel.values()[impalaLogLevel], TLogLevel.values()[otherLogLevel]); // Validate the authorization configuration before initializing the Frontend. // If there are any configuration problems Impala startup will fail. AuthorizationConfig authConfig = new AuthorizationConfig(serverName, authorizationPolicyFile, sentryConfigFile, authPolicyProviderClass); authConfig.validateConfig(); if (authConfig.isEnabled()) { LOG.info(String.format("Authorization is 'ENABLED' using %s", authConfig.isFileBasedPolicy() ? " file based policy from: " + authConfig.getPolicyFile() : " using Sentry Policy Service.")); } else { LOG.info("Authorization is 'DISABLED'."); } LOG.info(JniUtil.getJavaVersion()); frontend_ = new Frontend(authConfig); } /** * Jni wrapper for Frontend.createExecRequest(). Accepts a serialized * TQueryContext; returns a serialized TQueryExecRequest. */ public byte[] createExecRequest(byte[] thriftQueryContext) throws ImpalaException { TQueryCtx queryCtx = new TQueryCtx(); JniUtil.deserializeThrift(protocolFactory_, queryCtx, thriftQueryContext); StringBuilder explainString = new StringBuilder(); TExecRequest result = frontend_.createExecRequest(queryCtx, explainString); if (explainString.length() > 0) LOG.debug(explainString.toString()); // TODO: avoid creating serializer for each query? TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } public byte[] updateCatalogCache(byte[] thriftCatalogUpdate) throws ImpalaException { TUpdateCatalogCacheRequest req = new TUpdateCatalogCacheRequest(); JniUtil.deserializeThrift(protocolFactory_, req, thriftCatalogUpdate); TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(frontend_.updateCatalogCache(req)); } catch (TException e) { throw new InternalException(e.getMessage()); } } /** * Loads a table or partition with one or more data files. If the "overwrite" flag * in the request is true, all existing data in the table/partition will be replaced. * If the "overwrite" flag is false, the files will be added alongside any existing * data files. */ public byte[] loadTableData(byte[] thriftLoadTableDataParams) throws ImpalaException, IOException { TLoadDataReq request = new TLoadDataReq(); JniUtil.deserializeThrift(protocolFactory_, request, thriftLoadTableDataParams); TLoadDataResp response = frontend_.loadTableData(request); TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(response); } catch (TException e) { throw new InternalException(e.getMessage()); } } /** * Return an explain plan based on thriftQueryContext, a serialized TQueryContext. * This call is thread-safe. */ public String getExplainPlan(byte[] thriftQueryContext) throws ImpalaException { TQueryCtx queryCtx = new TQueryCtx(); JniUtil.deserializeThrift(protocolFactory_, queryCtx, thriftQueryContext); String plan = frontend_.getExplainString(queryCtx); LOG.debug("Explain plan: " + plan); return plan; } /** * Returns a list of table names matching an optional pattern. * The argument is a serialized TGetTablesParams object. * The return type is a serialised TGetTablesResult object. * @see Frontend#getTableNames */ public byte[] getTableNames(byte[] thriftGetTablesParams) throws ImpalaException { TGetTablesParams params = new TGetTablesParams(); JniUtil.deserializeThrift(protocolFactory_, params, thriftGetTablesParams); // If the session was not set it indicates this is an internal Impala call. User user = params.isSetSession() ? new User(TSessionStateUtil.getEffectiveUser(params.getSession())) : ImpalaInternalAdminUser.getInstance(); Preconditions.checkState(!params.isSetSession() || user != null); List<String> tables = frontend_.getTableNames(params.db, params.pattern, user); TGetTablesResult result = new TGetTablesResult(); result.setTables(tables); TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } /** * Returns a list of table names matching an optional pattern. * The argument is a serialized TGetTablesParams object. * The return type is a serialised TGetTablesResult object. * @see Frontend#getTableNames */ public byte[] getDbNames(byte[] thriftGetTablesParams) throws ImpalaException { TGetDbsParams params = new TGetDbsParams(); JniUtil.deserializeThrift(protocolFactory_, params, thriftGetTablesParams); // If the session was not set it indicates this is an internal Impala call. User user = params.isSetSession() ? new User(TSessionStateUtil.getEffectiveUser(params.getSession())) : ImpalaInternalAdminUser.getInstance(); List<String> dbs = frontend_.getDbNames(params.pattern, user); TGetDbsResult result = new TGetDbsResult(); result.setDbs(dbs); TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } /** * Returns a list of data sources matching an optional pattern. * The argument is a serialized TGetDataSrcsResult object. * The return type is a serialised TGetDataSrcsResult object. * @see Frontend#getDataSrcs */ public byte[] getDataSrcMetadata(byte[] thriftParams) throws ImpalaException { TGetDataSrcsParams params = new TGetDataSrcsParams(); JniUtil.deserializeThrift(protocolFactory_, params, thriftParams); TGetDataSrcsResult result = new TGetDataSrcsResult(); List<DataSource> dataSources = frontend_.getDataSrcs(params.pattern); result.setData_src_names(Lists.<String>newArrayListWithCapacity(dataSources.size())); result.setLocations(Lists.<String>newArrayListWithCapacity(dataSources.size())); result.setClass_names(Lists.<String>newArrayListWithCapacity(dataSources.size())); result.setApi_versions(Lists.<String>newArrayListWithCapacity(dataSources.size())); for (DataSource dataSource : dataSources) { result.addToData_src_names(dataSource.getName()); result.addToLocations(dataSource.getLocation()); result.addToClass_names(dataSource.getClassName()); result.addToApi_versions(dataSource.getApiVersion()); } TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } public byte[] getStats(byte[] thriftShowStatsParams) throws ImpalaException { TShowStatsParams params = new TShowStatsParams(); JniUtil.deserializeThrift(protocolFactory_, params, thriftShowStatsParams); Preconditions.checkState(params.isSetTable_name()); TResultSet result; if (params.isIs_show_col_stats()) { result = frontend_.getColumnStats(params.getTable_name().getDb_name(), params.getTable_name().getTable_name()); } else { result = frontend_.getTableStats(params.getTable_name().getDb_name(), params.getTable_name().getTable_name()); } TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } /** * Returns a list of function names matching an optional pattern. * The argument is a serialized TGetFunctionsParams object. * The return type is a serialised TGetFunctionsResult object. * @see Frontend#getTableNames */ public byte[] getFunctions(byte[] thriftGetFunctionsParams) throws ImpalaException { TGetFunctionsParams params = new TGetFunctionsParams(); JniUtil.deserializeThrift(protocolFactory_, params, thriftGetFunctionsParams); TGetFunctionsResult result = new TGetFunctionsResult(); List<String> signatures = Lists.newArrayList(); List<String> retTypes = Lists.newArrayList(); List<Function> fns = frontend_.getFunctions(params.category, params.db, params.pattern); for (Function fn : fns) { signatures.add(fn.signatureString()); retTypes.add(fn.getReturnType().toString()); } result.setFn_signatures(signatures); result.setFn_ret_types(retTypes); TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } /** * Gets the thrift representation of a catalog object. */ public byte[] getCatalogObject(byte[] thriftParams) throws ImpalaException, TException { TCatalogObject objectDescription = new TCatalogObject(); JniUtil.deserializeThrift(protocolFactory_, objectDescription, thriftParams); TSerializer serializer = new TSerializer(protocolFactory_); return serializer.serialize(frontend_.getCatalog().getTCatalogObject(objectDescription)); } /** * Returns a list of the columns making up a table. * The argument is a serialized TDescribeTableParams object. * The return type is a serialised TDescribeTableResult object. * @see Frontend#describeTable */ public byte[] describeTable(byte[] thriftDescribeTableParams) throws ImpalaException { TDescribeTableParams params = new TDescribeTableParams(); JniUtil.deserializeThrift(protocolFactory_, params, thriftDescribeTableParams); TDescribeTableResult result = frontend_.describeTable(params.getDb(), params.getTable_name(), params.getOutput_style()); TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } /** * Returns a SQL DDL string for creating the specified table. */ public String showCreateTable(byte[] thriftTableName) throws ImpalaException { TTableName params = new TTableName(); JniUtil.deserializeThrift(protocolFactory_, params, thriftTableName); return ToSqlUtils .getCreateTableSql(frontend_.getCatalog().getTable(params.getDb_name(), params.getTable_name())); } /** * Gets all roles */ public byte[] getRoles(byte[] showRolesParams) throws ImpalaException { TShowRolesParams params = new TShowRolesParams(); JniUtil.deserializeThrift(protocolFactory_, params, showRolesParams); TShowRolesResult result = new TShowRolesResult(); List<Role> roles = Lists.newArrayList(); if (params.isIs_show_current_roles() || params.isSetGrant_group()) { User user = new User(params.getRequesting_user()); Set<String> groupNames; if (params.isIs_show_current_roles()) { groupNames = frontend_.getAuthzChecker().getUserGroups(user); } else { Preconditions.checkState(params.isSetGrant_group()); groupNames = Sets.newHashSet(params.getGrant_group()); } for (String groupName : groupNames) { roles.addAll(frontend_.getCatalog().getAuthPolicy().getGrantedRoles(groupName)); } } else { Preconditions.checkState(!params.isIs_show_current_roles()); roles = frontend_.getCatalog().getAuthPolicy().getAllRoles(); } result.setRole_names(Lists.<String>newArrayListWithExpectedSize(roles.size())); for (Role role : roles) { result.getRole_names().add(role.getName()); } Collections.sort(result.getRole_names()); TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } public byte[] getRolePrivileges(byte[] showGrantRolesParams) throws ImpalaException { TShowGrantRoleParams params = new TShowGrantRoleParams(); JniUtil.deserializeThrift(protocolFactory_, params, showGrantRolesParams); TResultSet result = frontend_.getCatalog().getAuthPolicy().getRolePrivileges(params.getRole_name(), params.getPrivilege()); TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } /** * Executes a HiveServer2 metadata operation and returns a TResultSet */ public byte[] execHiveServer2MetadataOp(byte[] metadataOpsParams) throws ImpalaException { TMetadataOpRequest params = new TMetadataOpRequest(); JniUtil.deserializeThrift(protocolFactory_, params, metadataOpsParams); TResultSet result = frontend_.execHiveServer2MetadataOp(params); TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } public void setCatalogInitialized() { frontend_.getCatalog().setIsReady(); } // Caching this saves ~50ms per call to getHadoopConfigAsHtml private static final Configuration CONF = new Configuration(); /** * Returns a string of all loaded Hadoop configuration parameters as a table of keys * and values. If asText is true, output in raw text. Otherwise, output in html. */ public byte[] getAllHadoopConfigs() throws ImpalaException { Map<String, String> configs = Maps.newHashMap(); for (Map.Entry<String, String> e : CONF) { configs.put(e.getKey(), e.getValue()); } TGetAllHadoopConfigsResponse result = new TGetAllHadoopConfigsResponse(); result.setConfigs(configs); TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } /** * Returns the corresponding config value for the given key as a serialized * TGetHadoopConfigResponse. If the config value is null, the 'value' field in the * thrift response object will not be set. */ public byte[] getHadoopConfig(byte[] serializedRequest) throws ImpalaException { TGetHadoopConfigRequest request = new TGetHadoopConfigRequest(); JniUtil.deserializeThrift(protocolFactory_, request, serializedRequest); TGetHadoopConfigResponse result = new TGetHadoopConfigResponse(); result.setValue(CONF.get(request.getName())); TSerializer serializer = new TSerializer(protocolFactory_); try { return serializer.serialize(result); } catch (TException e) { throw new InternalException(e.getMessage()); } } public class CdhVersion implements Comparable<CdhVersion> { private final int major; private final int minor; public CdhVersion(String versionString) throws IllegalArgumentException { String[] version = versionString.split("\\."); if (version.length != 2) { throw new IllegalArgumentException("Invalid version string:" + versionString); } try { major = Integer.parseInt(version[0]); minor = Integer.parseInt(version[1]); } catch (NumberFormatException e) { throw new IllegalArgumentException("Invalid version string:" + versionString); } } public int compareTo(CdhVersion o) { return (this.major == o.major) ? (this.minor - o.minor) : (this.major - o.major); } @Override public String toString() { return major + "." + minor; } } /** * Returns an error string describing all configuration issues. If no config issues are * found, returns an empty string. * Short circuit read checks and block location tracking checks are run only if Impala * can determine that it is running on CDH. */ public String checkConfiguration() { CdhVersion guessedCdhVersion = guessCdhVersionFromNnWebUi(); CdhVersion cdh41 = new CdhVersion("4.1"); CdhVersion cdh42 = new CdhVersion("4.2"); StringBuilder output = new StringBuilder(); output.append(checkLogFilePermission()); output.append(checkFileSystem(CONF)); if (guessedCdhVersion == null) { // Do not run any additional checks because we cannot determine the CDH version LOG.warn("Cannot detect CDH version. Skipping Hadoop configuration checks"); return output.toString(); } if (guessedCdhVersion.compareTo(cdh41) == 0) { output.append(checkShortCircuitReadCdh41(CONF)); } else if (guessedCdhVersion.compareTo(cdh42) >= 0) { output.append(checkShortCircuitRead(CONF)); } else { output.append(guessedCdhVersion).append(" is detected but Impala requires CDH 4.1 or above."); } output.append(checkBlockLocationTracking(CONF)); return output.toString(); } /** * Returns an empty string if Impala has permission to write to FE log files. If not, * returns an error string describing the issues. */ private String checkLogFilePermission() { org.apache.log4j.Logger l4jRootLogger = org.apache.log4j.Logger.getRootLogger(); Enumeration appenders = l4jRootLogger.getAllAppenders(); while (appenders.hasMoreElements()) { Appender appender = (Appender) appenders.nextElement(); if (appender instanceof FileAppender) { if (((FileAppender) appender).getFile() == null) { // If Impala does not have permission to write to the log file, the // FileAppender will fail to initialize and logFile will be null. // Unfortunately, we can't get the log file name here. return "Impala does not have permission to write to the log file specified " + "in log4j.properties."; } } } return ""; } /** * Guess the CDH version by looking at the version info string from the Namenode web UI * Return the CDH version or null (if we can't determine the version) */ private CdhVersion guessCdhVersionFromNnWebUi() { try { // On a large cluster, avoid hitting the name node at the same time Random randomGenerator = new Random(); Thread.sleep(randomGenerator.nextInt(2000)); } catch (Exception e) { } try { String nnUrl = getCurrentNameNodeAddress(); if (nnUrl == null) { return null; } URL nnWebUi = new URL("http://" + nnUrl + "/dfshealth.jsp"); URLConnection conn = nnWebUi.openConnection(); BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream())); String inputLine; while ((inputLine = in.readLine()) != null) { if (inputLine.contains("Version:")) { // Parse the version string cdh<major>.<minor> Pattern cdhVersionPattern = Pattern.compile("cdh\\d\\.\\d"); Matcher versionMatcher = cdhVersionPattern.matcher(inputLine); if (versionMatcher.find()) { // Strip out "cdh" before passing to CdhVersion return new CdhVersion(versionMatcher.group().substring(3)); } return null; } } } catch (Exception e) { LOG.info(e.toString()); } return null; } /** * Derive the namenode http address from the current file system, * either default or as set by "-fs" in the generic options. * * @return Returns http address or null if failure. */ private String getCurrentNameNodeAddress() throws Exception { // get the filesystem object to verify it is an HDFS system FileSystem fs; fs = FileSystem.get(CONF); if (!(fs instanceof DistributedFileSystem)) { LOG.error("FileSystem is " + fs.getUri()); return null; } return DFSUtil.getInfoServer(HAUtil.getAddressOfActive(fs), CONF, false); } /** * Return an empty string if short circuit read is properly enabled. If not, return an * error string describing the issues. */ private String checkShortCircuitRead(Configuration conf) { StringBuilder output = new StringBuilder(); String errorMessage = "ERROR: short-circuit local reads is disabled because\n"; String prefix = " - "; StringBuilder errorCause = new StringBuilder(); // dfs.domain.socket.path must be set properly String domainSocketPath = conf.getTrimmed(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT); if (domainSocketPath.isEmpty()) { errorCause.append(prefix); errorCause.append(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY); errorCause.append(" is not configured.\n"); } else { // The socket path parent directory must be readable and executable. File socketFile = new File(domainSocketPath); File socketDir = socketFile.getParentFile(); if (socketDir == null || !socketDir.canRead() || !socketDir.canExecute()) { errorCause.append(prefix); errorCause.append("Impala cannot read or execute the parent directory of "); errorCause.append(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY); errorCause.append("\n"); } } // dfs.client.read.shortcircuit must be set to true. if (!conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)) { errorCause.append(prefix); errorCause.append(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY); errorCause.append(" is not enabled.\n"); } // dfs.client.use.legacy.blockreader.local must be set to false if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT)) { errorCause.append(prefix); errorCause.append(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL); errorCause.append(" should not be enabled.\n"); } if (errorCause.length() > 0) { output.append(errorMessage); output.append(errorCause); } return output.toString(); } /** * Check short circuit read for CDH 4.1. * Return an empty string if short circuit read is properly enabled. If not, return an * error string describing the issues. */ private String checkShortCircuitReadCdh41(Configuration conf) { StringBuilder output = new StringBuilder(); String errorMessage = "ERROR: short-circuit local reads is disabled because\n"; String prefix = " - "; StringBuilder errorCause = new StringBuilder(); // Client side checks // dfs.client.read.shortcircuit must be set to true. if (!conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)) { errorCause.append(prefix); errorCause.append(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY); errorCause.append(" is not enabled.\n"); } // dfs.client.use.legacy.blockreader.local must be set to true if (!conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT)) { errorCause.append(prefix); errorCause.append(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL); errorCause.append(" is not enabled.\n"); } // Server side checks // Check data node server side configuration by reading the CONF from the data node // web UI // TODO: disabled for now //cdh41ShortCircuitReadDatanodeCheck(errorCause, prefix); if (errorCause.length() > 0) { output.append(errorMessage); output.append(errorCause); } return output.toString(); } /** * Checks the data node's server side configuration by reading the CONF from the data * node. * This appends error messages to errorCause prefixed by prefix if data node * configuration is not properly set. */ private void cdh41ShortCircuitReadDatanodeCheck(StringBuilder errorCause, String prefix) { String dnWebUiAddr = CONF.get(DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY, DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_DEFAULT); URL dnWebUiUrl = null; try { dnWebUiUrl = new URL("http://" + dnWebUiAddr + "/conf"); } catch (Exception e) { LOG.info(e.toString()); } Configuration dnConf = new Configuration(false); dnConf.addResource(dnWebUiUrl); // dfs.datanode.data.dir.perm should be at least 750 int permissionInt = 0; try { String permission = dnConf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_KEY, DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_DEFAULT); permissionInt = Integer.parseInt(permission); } catch (Exception e) { } if (permissionInt < 750) { errorCause.append(prefix); errorCause.append("Data node configuration "); errorCause.append(DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_KEY); errorCause.append(" is not properly set. It should be set to 750.\n"); } // dfs.block.local-path-access.user should contain the user account impala is running // under String accessUser = dnConf.get(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY); if (accessUser == null || !accessUser.contains(System.getProperty("user.name"))) { errorCause.append(prefix); errorCause.append("Data node configuration "); errorCause.append(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY); errorCause.append(" is not properly set. It should contain "); errorCause.append(System.getProperty("user.name")); errorCause.append("\n"); } } /** * Return an empty string if block location tracking is properly enabled. If not, * return an error string describing the issues. */ private String checkBlockLocationTracking(Configuration conf) { StringBuilder output = new StringBuilder(); String errorMessage = "ERROR: block location tracking is not properly enabled " + "because\n"; String prefix = " - "; StringBuilder errorCause = new StringBuilder(); if (!conf.getBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT)) { errorCause.append(prefix); errorCause.append(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED); errorCause.append(" is not enabled.\n"); } // dfs.client.file-block-storage-locations.timeout should be >= 500 // TODO: OPSAPS-12765 - it should be >= 3000, but use 500 for now until CM refresh if (conf.getInt(DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT, DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_DEFAULT) < 500) { errorCause.append(prefix); errorCause.append(DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT); errorCause.append(" is too low. It should be at least 3000.\n"); } if (errorCause.length() > 0) { output.append(errorMessage); output.append(errorCause); } return output.toString(); } /** * Return an empty string if the FileSystem configured in CONF refers to a * DistributedFileSystem (the only one supported by Impala) and Impala can list the root * directory "/". Otherwise, return an error string describing the issues. */ private String checkFileSystem(Configuration conf) { try { FileSystem fs = FileSystem.get(CONF); if (!(fs instanceof DistributedFileSystem)) { return "Unsupported file system. Impala only supports DistributedFileSystem " + "but the configured filesystem is: " + fs.getClass().getSimpleName() + "." + CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY + "(" + CONF.get(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY) + ")" + " might be set incorrectly"; } } catch (IOException e) { return "couldn't retrieve FileSystem:\n" + e.getMessage(); } try { FileSystemUtil.getTotalNumVisibleFiles(new Path("/")); } catch (IOException e) { return "Could not read the HDFS root directory at " + CONF.get(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY) + ". Error was: \n" + e.getMessage(); } return ""; } }