Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p> * http://www.apache.org/licenses/LICENSE-2.0 * <p> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.metastore.utils; import java.beans.PropertyDescriptor; import java.io.File; import java.net.URL; import java.net.URLClassLoader; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.TimeZone; import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import java.util.stream.Collectors; import static java.util.regex.Pattern.compile; import javax.annotation.Nullable; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.ColumnType; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.WMPoolSchedulingPolicy; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; import org.apache.hadoop.security.SaslRpcServer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Joiner; public class MetaStoreUtils { /** A fixed date format to be used for hive partition column values. */ public static final ThreadLocal<DateFormat> PARTITION_DATE_FORMAT = new ThreadLocal<DateFormat>() { @Override protected DateFormat initialValue() { DateFormat val = new SimpleDateFormat("yyyy-MM-dd"); val.setLenient(false); // Without this, 2020-20-20 becomes 2021-08-20. val.setTimeZone(TimeZone.getTimeZone("UTC")); return val; } }; // Indicates a type was derived from the deserializer rather than Hive's metadata. public static final String TYPE_FROM_DESERIALIZER = "<derived from deserializer>"; private static final Logger LOG = LoggerFactory.getLogger(MetaStoreUtils.class); // The following two are public for any external users who wish to use them. /** * This character is used to mark a database name as having a catalog name prepended. This * marker should be placed first in the String to make it easy to determine that this has both * a catalog and a database name. @ is chosen as it is not used in regular expressions. This * is only intended for use when making old Thrift calls that do not support catalog names. */ public static final char CATALOG_DB_THRIFT_NAME_MARKER = '@'; /** * This String is used to seaprate the catalog name from the database name. This should only * be used in Strings that are prepended with {@link #CATALOG_DB_THRIFT_NAME_MARKER}. # is * chosen because it is not used in regular expressions. this is only intended for use when * making old Thrift calls that do not support catalog names. */ public static final String CATALOG_DB_SEPARATOR = "#"; /** * Mark a database as being empty (as distinct from null). */ public static final String DB_EMPTY_MARKER = "!"; public static final String EXTERNAL_TABLE_PURGE = "external.table.purge"; // Right now we only support one special character '/'. // More special characters can be added accordingly in the future. // NOTE: // If the following array is updated, please also be sure to update the // configuration parameter documentation // HIVE_SUPPORT_SPECICAL_CHARACTERS_IN_TABLE_NAMES in HiveConf as well. private static final char[] specialCharactersInTableNames = new char[] { '/' }; /** * Catches exceptions that can't be handled and bundles them to MetaException * * @param e exception to wrap. * @throws MetaException wrapper for the exception */ public static void logAndThrowMetaException(Exception e) throws MetaException { String exInfo = "Got exception: " + e.getClass().getName() + " " + e.getMessage(); LOG.error(exInfo, e); LOG.error("Converting exception to MetaException"); throw new MetaException(exInfo); } public static String encodeTableName(String name) { // The encoding method is simple, e.g., replace // all the special characters with the corresponding number in ASCII. // Note that unicode is not supported in table names. And we have explicit // checks for it. StringBuilder sb = new StringBuilder(); for (char ch : name.toCharArray()) { if (Character.isLetterOrDigit(ch) || ch == '_') { sb.append(ch); } else { sb.append('-').append((int) ch).append('-'); } } return sb.toString(); } /** * convert Exception to MetaException, which sets the cause to such exception * @param e cause of the exception * @return the MetaException with the specified exception as the cause */ public static MetaException newMetaException(Exception e) { return newMetaException(e != null ? e.getMessage() : null, e); } /** * convert Exception to MetaException, which sets the cause to such exception * @param errorMessage the error message for this MetaException * @param e cause of the exception * @return the MetaException with the specified exception as the cause */ public static MetaException newMetaException(String errorMessage, Exception e) { MetaException metaException = new MetaException(errorMessage); if (e != null) { metaException.initCause(e); } return metaException; } public static List<String> getColumnNamesForTable(Table table) { List<String> colNames = new ArrayList<>(); Iterator<FieldSchema> colsIterator = table.getSd().getColsIterator(); while (colsIterator.hasNext()) { colNames.add(colsIterator.next().getName()); } return colNames; } /** * validateName * * Checks the name conforms to our standars which are: "[a-zA-z_0-9]+". checks * this is just characters and numbers and _ * * @param name * the name to validate * @param conf * hive configuration * @return true or false depending on conformance * if it doesn't match the pattern. */ public static boolean validateName(String name, Configuration conf) { Pattern tpat; String allowedCharacters = "\\w_"; if (conf != null && MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.SUPPORT_SPECICAL_CHARACTERS_IN_TABLE_NAMES)) { for (Character c : specialCharactersInTableNames) { allowedCharacters += c; } } tpat = Pattern.compile("[" + allowedCharacters + "]+"); Matcher m = tpat.matcher(name); return m.matches(); } /** * Determines whether a table is an external table. * * @param table table of interest * * @return true if external */ public static boolean isExternalTable(Table table) { if (table == null) { return false; } Map<String, String> params = table.getParameters(); if (params == null) { return false; } return isExternal(params); } /** * Determines whether an table needs to be purged or not. * * @param table table of interest * * @return true if external table needs to be purged */ public static boolean isExternalTablePurge(Table table) { if (table == null) { return false; } Map<String, String> params = table.getParameters(); if (params == null) { return false; } return isPropertyTrue(params, EXTERNAL_TABLE_PURGE); } public static boolean isExternal(Map<String, String> tableParams) { return isPropertyTrue(tableParams, "EXTERNAL"); } public static boolean isPropertyTrue(Map<String, String> tableParams, String prop) { return "TRUE".equalsIgnoreCase(tableParams.get(prop)); } /** Duplicates AcidUtils; used in a couple places in metastore. */ public static boolean isInsertOnlyTableParam(Map<String, String> params) { String transactionalProp = params.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); return (transactionalProp != null && "insert_only".equalsIgnoreCase(transactionalProp)); } public static boolean isNonNativeTable(Table table) { if (table == null || table.getParameters() == null) { return false; } return (table.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE) != null); } /** * Given a list of partition columns and a partial mapping from * some partition columns to values the function returns the values * for the column. * @param partCols the list of table partition columns * @param partSpec the partial mapping from partition column to values * @return list of values of for given partition columns, any missing * values in partSpec is replaced by an empty string */ public static List<String> getPvals(List<FieldSchema> partCols, Map<String, String> partSpec) { List<String> pvals = new ArrayList<>(partCols.size()); for (FieldSchema field : partCols) { String val = StringUtils.defaultString(partSpec.get(field.getName())); pvals.add(val); } return pvals; } public static String makePartNameMatcher(Table table, List<String> partVals, String defaultStr) throws MetaException { List<FieldSchema> partCols = table.getPartitionKeys(); int numPartKeys = partCols.size(); if (partVals.size() > numPartKeys) { throw new MetaException("Incorrect number of partition values." + " numPartKeys=" + numPartKeys + ", part_val=" + partVals); } partCols = partCols.subList(0, partVals.size()); // Construct a pattern of the form: partKey=partVal/partKey2=partVal2/... // where partVal is either the escaped partition value given as input, // or a regex of the form ".*" // This works because the "=" and "/" separating key names and partition key/values // are not escaped. String partNameMatcher = Warehouse.makePartName(partCols, partVals, defaultStr); // add ".*" to the regex to match anything else afterwards the partial spec. if (partVals.size() < numPartKeys) { partNameMatcher += defaultStr; } return partNameMatcher; } /** * @param schema1: The first schema to be compared * @param schema2: The second schema to be compared * @return true if the two schemas are the same else false * for comparing a field we ignore the comment it has */ public static boolean compareFieldColumns(List<FieldSchema> schema1, List<FieldSchema> schema2) { if (schema1.size() != schema2.size()) { return false; } Iterator<FieldSchema> its1 = schema1.iterator(); Iterator<FieldSchema> its2 = schema2.iterator(); while (its1.hasNext()) { FieldSchema f1 = its1.next(); FieldSchema f2 = its2.next(); // The default equals provided by thrift compares the comments too for // equality, thus we need to compare the relevant fields here. if (!StringUtils.equals(f1.getName(), f2.getName()) || !StringUtils.equals(f1.getType(), f2.getType())) { return false; } } return true; } public static boolean isArchived(Partition part) { Map<String, String> params = part.getParameters(); return (params != null && "TRUE".equalsIgnoreCase(params.get(hive_metastoreConstants.IS_ARCHIVED))); } public static Path getOriginalLocation(Partition part) { Map<String, String> params = part.getParameters(); assert (isArchived(part)); String originalLocation = params.get(hive_metastoreConstants.ORIGINAL_LOCATION); assert (originalLocation != null); return new Path(originalLocation); } private static String ARCHIVING_LEVEL = "archiving_level"; public static int getArchivingLevel(Partition part) throws MetaException { if (!isArchived(part)) { throw new MetaException("Getting level of unarchived partition"); } String lv = part.getParameters().get(ARCHIVING_LEVEL); if (lv != null) { return Integer.parseInt(lv); } // partitions archived before introducing multiple archiving return part.getValues().size(); } /** * Read and return the meta store Sasl configuration. Currently it uses the default * Hadoop SASL configuration and can be configured using "hadoop.rpc.protection" * HADOOP-10211, made a backward incompatible change due to which this call doesn't * work with Hadoop 2.4.0 and later. * @param conf * @return The SASL configuration */ public static Map<String, String> getMetaStoreSaslProperties(Configuration conf, boolean useSSL) { // As of now Hive Meta Store uses the same configuration as Hadoop SASL configuration // If SSL is enabled, override the given value of "hadoop.rpc.protection" and set it to "authentication" // This disables any encryption provided by SASL, since SSL already provides it String hadoopRpcProtectionVal = conf.get(CommonConfigurationKeysPublic.HADOOP_RPC_PROTECTION); String hadoopRpcProtectionAuth = SaslRpcServer.QualityOfProtection.AUTHENTICATION.toString(); if (useSSL && hadoopRpcProtectionVal != null && !hadoopRpcProtectionVal.equals(hadoopRpcProtectionAuth)) { LOG.warn("Overriding value of " + CommonConfigurationKeysPublic.HADOOP_RPC_PROTECTION + " setting it from " + hadoopRpcProtectionVal + " to " + hadoopRpcProtectionAuth + " because SSL is enabled"); conf.set(CommonConfigurationKeysPublic.HADOOP_RPC_PROTECTION, hadoopRpcProtectionAuth); } return HadoopThriftAuthBridge.getBridge().getHadoopSaslProperties(conf); } /** * Add new elements to the classpath. * * @param newPaths * Array of classpath elements */ public static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths) throws Exception { URLClassLoader loader = (URLClassLoader) cloader; List<URL> curPath = Arrays.asList(loader.getURLs()); ArrayList<URL> newPath = new ArrayList<>(curPath.size()); // get a list with the current classpath components for (URL onePath : curPath) { newPath.add(onePath); } curPath = newPath; for (String onestr : newPaths) { URL oneurl = urlFromPathString(onestr); if (oneurl != null && !curPath.contains(oneurl)) { curPath.add(oneurl); } } return new URLClassLoader(curPath.toArray(new URL[0]), loader); } /** * Create a URL from a string representing a path to a local file. * The path string can be just a path, or can start with file:/, file:/// * @param onestr path string * @return */ private static URL urlFromPathString(String onestr) { URL oneurl = null; try { if (onestr.startsWith("file:/")) { oneurl = new URL(onestr); } else { oneurl = new File(onestr).toURL(); } } catch (Exception err) { LOG.error("Bad URL " + onestr + ", ignoring path"); } return oneurl; } /** * Convert FieldSchemas to Thrift DDL. */ public static String getDDLFromFieldSchema(String structName, List<FieldSchema> fieldSchemas) { StringBuilder ddl = new StringBuilder(); ddl.append("struct "); ddl.append(structName); ddl.append(" { "); boolean first = true; for (FieldSchema col : fieldSchemas) { if (first) { first = false; } else { ddl.append(", "); } ddl.append(ColumnType.typeToThriftType(col.getType())); ddl.append(' '); ddl.append(col.getName()); } ddl.append("}"); LOG.trace("DDL: {}", ddl); return ddl.toString(); } public static Properties getTableMetadata(org.apache.hadoop.hive.metastore.api.Table table) { return MetaStoreUtils.getSchema(table.getSd(), table.getSd(), table.getParameters(), table.getDbName(), table.getTableName(), table.getPartitionKeys()); } public static Properties getPartitionMetadata(org.apache.hadoop.hive.metastore.api.Partition partition, org.apache.hadoop.hive.metastore.api.Table table) { return MetaStoreUtils.getSchema(partition.getSd(), partition.getSd(), partition.getParameters(), table.getDbName(), table.getTableName(), table.getPartitionKeys()); } public static Properties getSchema(org.apache.hadoop.hive.metastore.api.Partition part, org.apache.hadoop.hive.metastore.api.Table table) { return MetaStoreUtils.getSchema(part.getSd(), table.getSd(), table.getParameters(), table.getDbName(), table.getTableName(), table.getPartitionKeys()); } /** * Get partition level schema from table level schema. * This function will use the same column names, column types and partition keys for * each partition Properties. Their values are copied from the table Properties. This * is mainly to save CPU and memory. CPU is saved because the first time the * StorageDescriptor column names are accessed, JDO needs to execute a SQL query to * retrieve the data. If we know the data will be the same as the table level schema * and they are immutable, we should just reuse the table level schema objects. * * @param sd The Partition level Storage Descriptor. * @param parameters partition level parameters * @param tblSchema The table level schema from which this partition should be copied. * @return the properties */ public static Properties getPartSchemaFromTableSchema(StorageDescriptor sd, Map<String, String> parameters, Properties tblSchema) { // Inherent most properties from table level schema and overwrite some properties // in the following code. // This is mainly for saving CPU and memory to reuse the column names, types and // partition columns in the table level schema. Properties schema = (Properties) tblSchema.clone(); // InputFormat String inputFormat = sd.getInputFormat(); if (inputFormat == null || inputFormat.length() == 0) { String tblInput = schema .getProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT); if (tblInput == null) { inputFormat = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName(); } else { inputFormat = tblInput; } } schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT, inputFormat); // OutputFormat String outputFormat = sd.getOutputFormat(); if (outputFormat == null || outputFormat.length() == 0) { String tblOutput = schema .getProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_OUTPUT_FORMAT); if (tblOutput == null) { outputFormat = org.apache.hadoop.mapred.SequenceFileOutputFormat.class.getName(); } else { outputFormat = tblOutput; } } schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_OUTPUT_FORMAT, outputFormat); // Location if (sd.getLocation() != null) { schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION, sd.getLocation()); } // Bucket count schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_COUNT, Integer.toString(sd.getNumBuckets())); if (sd.getBucketCols() != null && sd.getBucketCols().size() > 0) { schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_FIELD_NAME, Joiner.on(",").join(sd.getBucketCols())); } // SerdeInfo if (sd.getSerdeInfo() != null) { // We should not update the following 3 values if SerDeInfo contains these. // This is to keep backward compatible with getSchema(), where these 3 keys // are updated after SerDeInfo properties got copied. String cols = org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS; String colTypes = org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES; String parts = org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS; for (Map.Entry<String, String> param : sd.getSerdeInfo().getParameters().entrySet()) { String key = param.getKey(); if (schema.get(key) != null && ((key.equals(cols) || key.equals(colTypes) || key.equals(parts) || // Skip Druid and JDBC properties which are used in respective SerDes, // since they are also updated after SerDeInfo properties are copied. key.startsWith( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.DRUID_CONFIG_PREFIX) || key.startsWith( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.JDBC_CONFIG_PREFIX)))) { continue; } schema.put(key, (param.getValue() != null) ? param.getValue() : StringUtils.EMPTY); } if (sd.getSerdeInfo().getSerializationLib() != null) { schema.setProperty(ColumnType.SERIALIZATION_LIB, sd.getSerdeInfo().getSerializationLib()); } } // skipping columns since partition level field schemas are the same as table level's // skipping partition keys since it is the same as table level partition keys if (parameters != null) { for (Map.Entry<String, String> e : parameters.entrySet()) { schema.setProperty(e.getKey(), e.getValue()); } } return schema; } private static Properties addCols(Properties schema, List<FieldSchema> cols) { StringBuilder colNameBuf = new StringBuilder(); StringBuilder colTypeBuf = new StringBuilder(); StringBuilder colComment = new StringBuilder(); boolean first = true; String columnNameDelimiter = getColumnNameDelimiter(cols); for (FieldSchema col : cols) { if (!first) { colNameBuf.append(columnNameDelimiter); colTypeBuf.append(":"); colComment.append('\0'); } colNameBuf.append(col.getName()); colTypeBuf.append(col.getType()); colComment.append((null != col.getComment()) ? col.getComment() : StringUtils.EMPTY); first = false; } schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS, colNameBuf.toString()); schema.setProperty(ColumnType.COLUMN_NAME_DELIMITER, columnNameDelimiter); String colTypes = colTypeBuf.toString(); schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES, colTypes); schema.setProperty("columns.comments", colComment.toString()); return schema; } public static Properties getSchemaWithoutCols(StorageDescriptor sd, Map<String, String> parameters, String databaseName, String tableName, List<FieldSchema> partitionKeys) { Properties schema = new Properties(); String inputFormat = sd.getInputFormat(); if (inputFormat == null || inputFormat.length() == 0) { inputFormat = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName(); } schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT, inputFormat); String outputFormat = sd.getOutputFormat(); if (outputFormat == null || outputFormat.length() == 0) { outputFormat = org.apache.hadoop.mapred.SequenceFileOutputFormat.class.getName(); } schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_OUTPUT_FORMAT, outputFormat); schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, databaseName + "." + tableName); if (sd.getLocation() != null) { schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION, sd.getLocation()); } schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_COUNT, Integer.toString(sd.getNumBuckets())); if (sd.getBucketCols() != null && sd.getBucketCols().size() > 0) { schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_FIELD_NAME, Joiner.on(",").join(sd.getBucketCols())); } if (sd.getSerdeInfo() != null) { for (Map.Entry<String, String> param : sd.getSerdeInfo().getParameters().entrySet()) { schema.put(param.getKey(), (param.getValue() != null) ? param.getValue() : StringUtils.EMPTY); } if (sd.getSerdeInfo().getSerializationLib() != null) { schema.setProperty(ColumnType.SERIALIZATION_LIB, sd.getSerdeInfo().getSerializationLib()); } } if (sd.getCols() != null) { schema.setProperty(ColumnType.SERIALIZATION_DDL, getDDLFromFieldSchema(tableName, sd.getCols())); } String partString = StringUtils.EMPTY; String partStringSep = StringUtils.EMPTY; String partTypesString = StringUtils.EMPTY; String partTypesStringSep = StringUtils.EMPTY; for (FieldSchema partKey : partitionKeys) { partString = partString.concat(partStringSep); partString = partString.concat(partKey.getName()); partTypesString = partTypesString.concat(partTypesStringSep); partTypesString = partTypesString.concat(partKey.getType()); if (partStringSep.length() == 0) { partStringSep = "/"; partTypesStringSep = ":"; } } if (partString.length() > 0) { schema.setProperty( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, partString); schema.setProperty( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES, partTypesString); } if (parameters != null) { for (Map.Entry<String, String> e : parameters.entrySet()) { // add non-null parameters to the schema if (e.getValue() != null) { schema.setProperty(e.getKey(), e.getValue()); } } } return schema; } public static Properties getSchema(org.apache.hadoop.hive.metastore.api.StorageDescriptor sd, org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd, Map<String, String> parameters, String databaseName, String tableName, List<FieldSchema> partitionKeys) { return addCols(getSchemaWithoutCols(sd, parameters, databaseName, tableName, partitionKeys), tblsd.getCols()); } public static String getColumnNameDelimiter(List<FieldSchema> fieldSchemas) { // we first take a look if any fieldSchemas contain COMMA for (int i = 0; i < fieldSchemas.size(); i++) { if (fieldSchemas.get(i).getName().contains(",")) { return String.valueOf(ColumnType.COLUMN_COMMENTS_DELIMITER); } } return String.valueOf(','); } /** * Convert FieldSchemas to columnNames. */ public static String getColumnNamesFromFieldSchema(List<FieldSchema> fieldSchemas) { String delimiter = getColumnNameDelimiter(fieldSchemas); StringBuilder sb = new StringBuilder(); for (int i = 0; i < fieldSchemas.size(); i++) { if (i > 0) { sb.append(delimiter); } sb.append(fieldSchemas.get(i).getName()); } return sb.toString(); } /** * Convert FieldSchemas to columnTypes. */ public static String getColumnTypesFromFieldSchema(List<FieldSchema> fieldSchemas) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < fieldSchemas.size(); i++) { if (i > 0) { sb.append(","); } sb.append(fieldSchemas.get(i).getType()); } return sb.toString(); } public static String getColumnCommentsFromFieldSchema(List<FieldSchema> fieldSchemas) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < fieldSchemas.size(); i++) { if (i > 0) { sb.append(ColumnType.COLUMN_COMMENTS_DELIMITER); } sb.append(fieldSchemas.get(i).getComment()); } return sb.toString(); } public static boolean isMaterializedViewTable(Table table) { if (table == null) { return false; } return TableType.MATERIALIZED_VIEW.toString().equals(table.getTableType()); } public static List<String> getColumnNames(List<FieldSchema> schema) { List<String> cols = new ArrayList<>(schema.size()); for (FieldSchema fs : schema) { cols.add(fs.getName()); } return cols; } public static boolean isValidSchedulingPolicy(String str) { try { parseSchedulingPolicy(str); return true; } catch (IllegalArgumentException ex) { } return false; } public static WMPoolSchedulingPolicy parseSchedulingPolicy(String schedulingPolicy) { if (schedulingPolicy == null) { return WMPoolSchedulingPolicy.FAIR; } schedulingPolicy = schedulingPolicy.trim().toUpperCase(); if ("DEFAULT".equals(schedulingPolicy)) { return WMPoolSchedulingPolicy.FAIR; } return Enum.valueOf(WMPoolSchedulingPolicy.class, schedulingPolicy); } private static boolean hasCatalogName(String dbName) { return dbName != null && dbName.length() > 0 && dbName.charAt(0) == CATALOG_DB_THRIFT_NAME_MARKER; } /** * Given a catalog name and database name cram them together into one string. This method can * be used if you do not know the catalog name, in which case the default catalog will be * retrieved from the conf object. The resulting string can be parsed apart again via * {@link #parseDbName(String, Configuration)}. * @param catalogName catalog name, can be null if no known. * @param dbName database name, can be null or empty. * @param conf configuration object, used to determine default catalog if catalogName is null * @return one string that contains both. */ public static String prependCatalogToDbName(@Nullable String catalogName, @Nullable String dbName, Configuration conf) { if (catalogName == null) catalogName = getDefaultCatalog(conf); StringBuilder buf = new StringBuilder().append(CATALOG_DB_THRIFT_NAME_MARKER).append(catalogName) .append(CATALOG_DB_SEPARATOR); if (dbName != null) { if (dbName.isEmpty()) buf.append(DB_EMPTY_MARKER); else buf.append(dbName); } return buf.toString(); } /** * Given a catalog name and database name, cram them together into one string. These can be * parsed apart again via {@link #parseDbName(String, Configuration)}. * @param catalogName catalog name. This cannot be null. If this might be null use * {@link #prependCatalogToDbName(String, String, Configuration)} instead. * @param dbName database name. * @return one string that contains both. */ public static String prependNotNullCatToDbName(String catalogName, String dbName) { assert catalogName != null; return prependCatalogToDbName(catalogName, dbName, null); } /** * Prepend the default 'hive' catalog onto the database name. * @param dbName database name * @param conf configuration object, used to determine default catalog * @return one string with the 'hive' catalog name prepended. */ public static String prependCatalogToDbName(String dbName, Configuration conf) { return prependCatalogToDbName(null, dbName, conf); } private final static String[] nullCatalogAndDatabase = { null, null }; /** * Parse the catalog name out of the database name. If no catalog name is present then the * default catalog (as set in configuration file) will be assumed. * @param dbName name of the database. This may or may not contain the catalog name. * @param conf configuration object, used to determine the default catalog if it is not present * in the database name. * @return an array of two elements, the first being the catalog name, the second the database * name. * @throws MetaException if the name is not either just a database name or a catalog plus * database name with the proper delimiters. */ public static String[] parseDbName(String dbName, Configuration conf) throws MetaException { if (dbName == null) return nullCatalogAndDatabase; if (hasCatalogName(dbName)) { if (dbName.endsWith(CATALOG_DB_SEPARATOR)) { // This means the DB name is null return new String[] { dbName.substring(1, dbName.length() - 1), null }; } else if (dbName.endsWith(DB_EMPTY_MARKER)) { // This means the DB name is empty return new String[] { dbName.substring(1, dbName.length() - DB_EMPTY_MARKER.length() - 1), "" }; } String[] names = dbName.substring(1).split(CATALOG_DB_SEPARATOR, 2); if (names.length != 2) { throw new MetaException(dbName + " is prepended with the catalog marker but does not " + "appear to have a catalog name in it"); } return names; } else { return new String[] { getDefaultCatalog(conf), dbName }; } } /** * Position in the array returned by {@link #parseDbName} that has the catalog name. */ public static final int CAT_NAME = 0; /** * Position in the array returned by {@link #parseDbName} that has the database name. */ public static final int DB_NAME = 1; public static String getDefaultCatalog(Configuration conf) { if (conf == null) { LOG.warn("Configuration is null, so going with default catalog."); return Warehouse.DEFAULT_CATALOG_NAME; } String catName = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.CATALOG_DEFAULT); if (catName == null || "".equals(catName)) catName = Warehouse.DEFAULT_CATALOG_NAME; return catName; } public static boolean isView(Table table) { if (table == null) { return false; } return TableType.VIRTUAL_VIEW.toString().equals(table.getTableType()); } /** * filters a given map with predicate provided. All entries of map whose key matches with * predicate will be removed. Expects map to be modifiable and does the operation on actual map, * so does not return a copy of filtered map. * @param map A map of String key-value pairs * @param predicate Predicate with pattern to filter the map */ public static <T> void filterMapKeys(Map<String, T> map, Predicate<String> predicate) { if (map == null) { return; } map.entrySet().removeIf(entry -> predicate.test(entry.getKey())); } /** * filters a given map with list of predicates. All entries of map whose key matches with any * predicate will be removed. Expects map to be modifiable and does the operation on actual map, * so does not return a copy of filtered map. * @param map A map of String key-value pairs * @param predicates List of predicates with patterns to filter the map */ public static <T> void filterMapkeys(Map<String, T> map, List<Predicate<String>> predicates) { if (map == null) { return; } filterMapKeys(map, predicates.stream().reduce(Predicate::or).orElse(x -> false)); } /** * Compile a list of regex patterns and collect them as Predicates. * @param patterns List of regex patterns to be compiled * @return a List of Predicate created by compiling the regex patterns */ public static List<Predicate<String>> compilePatternsToPredicates(List<String> patterns) { return patterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList()); } }