Java tutorial
// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package org.apache.impala.catalog; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.impala.analysis.ColumnDef; import org.apache.impala.analysis.Expr; import org.apache.impala.analysis.LiteralExpr; import org.apache.impala.analysis.NullLiteral; import org.apache.impala.analysis.NumericLiteral; import org.apache.impala.analysis.PartitionKeyValue; import org.apache.impala.catalog.HdfsPartition.BlockReplica; import org.apache.impala.catalog.HdfsPartition.FileBlock; import org.apache.impala.catalog.HdfsPartition.FileDescriptor; import org.apache.impala.common.FileSystemUtil; import org.apache.impala.common.Pair; import org.apache.impala.common.PrintUtils; import org.apache.impala.service.BackendConfig; import org.apache.impala.thrift.ImpalaInternalServiceConstants; import org.apache.impala.thrift.TAccessLevel; import org.apache.impala.thrift.TCatalogObjectType; import org.apache.impala.thrift.TColumn; import org.apache.impala.thrift.THdfsFileBlock; import org.apache.impala.thrift.THdfsPartition; import org.apache.impala.thrift.THdfsTable; import org.apache.impala.thrift.TNetworkAddress; import org.apache.impala.thrift.TPartitionKeyValue; import org.apache.impala.thrift.TResultRow; import org.apache.impala.thrift.TResultSet; import org.apache.impala.thrift.TResultSetMetadata; import org.apache.impala.thrift.TTable; import org.apache.impala.thrift.TTableDescriptor; import org.apache.impala.thrift.TTableType; import org.apache.impala.util.AvroSchemaConverter; import org.apache.impala.util.AvroSchemaParser; import org.apache.impala.util.AvroSchemaUtils; import org.apache.impala.util.FsPermissionChecker; import org.apache.impala.util.HdfsCachingUtil; import org.apache.impala.util.ListMap; import org.apache.impala.util.MetaStoreUtil; import org.apache.impala.util.TAccessLevelUtil; import org.apache.impala.util.TResultRowBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; /** * Internal representation of table-related metadata of a file-resident table on a * Hadoop filesystem. The table data can be accessed through libHDFS (which is more of * an abstraction over Hadoop's FileSystem class rather than DFS specifically). A * partitioned table can even span multiple filesystems. * * This class is not thread-safe. Clients of this class need to protect against * concurrent updates using external locking (see CatalogOpExecutor class). * * Owned by Catalog instance. * The partition keys constitute the clustering columns. * */ public class HdfsTable extends Table { // hive's default value for table property 'serialization.null.format' private static final String DEFAULT_NULL_COLUMN_VALUE = "\\N"; // Name of default partition for unpartitioned tables private static final String DEFAULT_PARTITION_NAME = ""; // Number of times to retry fetching the partitions from the HMS should an error occur. private final static int NUM_PARTITION_FETCH_RETRIES = 5; // Table property key for skip.header.line.count public static final String TBL_PROP_SKIP_HEADER_LINE_COUNT = "skip.header.line.count"; // An invalid network address, which will always be treated as remote. private final static TNetworkAddress REMOTE_NETWORK_ADDRESS = new TNetworkAddress("remote*addr", 0); // Minimum block size in bytes allowed for synthetic file blocks (other than the last // block, which may be shorter). private final static long MIN_SYNTHETIC_BLOCK_SIZE = 1024 * 1024; // string to indicate NULL. set in load() from table properties private String nullColumnValue_; // hive uses this string for NULL partition keys. Set in load(). private String nullPartitionKeyValue_; // Avro schema of this table if this is an Avro table, otherwise null. Set in load(). private String avroSchema_ = null; // Set to true if any of the partitions have Avro data. private boolean hasAvroData_ = false; // True if this table's metadata is marked as cached. Does not necessarily mean the // data is cached or that all/any partitions are cached. private boolean isMarkedCached_ = false; // Array of sorted maps storing the association between partition values and // partition ids. There is one sorted map per partition key. // TODO: We should not populate this for HdfsTable objects stored in the catalog // server. private ArrayList<TreeMap<LiteralExpr, HashSet<Long>>> partitionValuesMap_ = Lists.newArrayList(); // Array of partition id sets that correspond to partitions with null values // in the partition keys; one set per partition key. private ArrayList<HashSet<Long>> nullPartitionIds_ = Lists.newArrayList(); // Map of partition ids to HdfsPartitions. private HashMap<Long, HdfsPartition> partitionMap_ = Maps.newHashMap(); // Map of partition name to HdfsPartition object. Used for speeding up // table metadata loading. private HashMap<String, HdfsPartition> nameToPartitionMap_ = Maps.newHashMap(); // Store all the partition ids of an HdfsTable. private HashSet<Long> partitionIds_ = Sets.newHashSet(); // Estimate (in bytes) of the incremental stats size per column per partition public static final long STATS_SIZE_PER_COLUMN_BYTES = 400; // Bi-directional map between an integer index and a unique datanode // TNetworkAddresses, each of which contains blocks of 1 or more // files in this table. The network addresses are stored using IP // address as the host name. Each FileBlock specifies a list of // indices within this hostIndex_ to specify which nodes contain // replicas of the block. private final ListMap<TNetworkAddress> hostIndex_ = new ListMap<TNetworkAddress>(); private HdfsPartitionLocationCompressor partitionLocationCompressor_; // Total number of Hdfs files in this table. Set in load(). private long numHdfsFiles_; // Sum of sizes of all Hdfs files in this table. Set in load(). private long totalHdfsBytes_; // True iff the table's partitions are located on more than one filesystem. private boolean multipleFileSystems_ = false; // Base Hdfs directory where files of this table are stored. // For unpartitioned tables it is simply the path where all files live. // For partitioned tables it is the root directory // under which partition dirs are placed. protected String hdfsBaseDir_; // List of FieldSchemas that correspond to the non-partition columns. Used when // describing this table and its partitions to the HMS (e.g. as part of an alter table // operation), when only non-partition columns are required. private final List<FieldSchema> nonPartFieldSchemas_ = Lists.newArrayList(); // Flag to check if the table schema has been loaded. Used as a precondition // for setAvroSchema(). private boolean isSchemaLoaded_ = false; private final static Logger LOG = LoggerFactory.getLogger(HdfsTable.class); // Caching this configuration object makes calls to getFileSystem much quicker // (saves ~50ms on a standard plan) // TODO(henry): confirm that this is thread safe - cursory inspection of the class // and its usage in getFileSystem suggests it should be. private static final Configuration CONF = new Configuration(); public HdfsTable(org.apache.hadoop.hive.metastore.api.Table msTbl, Db db, String name, String owner) { super(msTbl, db, name, owner); partitionLocationCompressor_ = new HdfsPartitionLocationCompressor(numClusteringCols_); } /** * Returns true if the table resides at a location which supports caching (e.g. HDFS). */ public boolean isLocationCacheable() { return FileSystemUtil.isPathCacheable(new Path(getLocation())); } /** * Returns true if the table and all its partitions reside at locations which * support caching (e.g. HDFS). */ public boolean isCacheable() { if (!isLocationCacheable()) return false; if (!isMarkedCached() && numClusteringCols_ > 0) { for (HdfsPartition partition : getPartitions()) { if (partition.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) { continue; } if (!partition.isCacheable()) { return false; } } } return true; } /** * Drops and re-loads the block metadata for all partitions in 'partsByPath' whose * location is under the given 'dirPath'. It involves the following steps: * - Clear the current block metadata of the partitions. * - Call FileSystem.listStatus() on 'dirPath' to fetch the BlockLocations for each * file under it recursively. * - For every valid data file, map it to a partition from 'partsByPath' (if one exists) * and enumerate all its blocks and their corresponding hosts and disk IDs. * Requires that 'dirPath' and all paths in 'partsByPath' have consistent qualification * (either fully qualified or unqualified), for isDescendantPath(). * TODO: Split this method into more logical methods for cleaner code. */ private void loadBlockMetadata(Path dirPath, HashMap<Path, List<HdfsPartition>> partsByPath) { try { FileSystem fs = dirPath.getFileSystem(CONF); // No need to load blocks for empty partitions list. if (partsByPath.size() == 0 || !fs.exists(dirPath)) return; if (LOG.isTraceEnabled()) { LOG.trace("Loading block md for " + name_ + " directory " + dirPath.toString()); } // Clear the state of partitions under dirPath since they are going to be updated // based on the current snapshot of files in the directory. List<HdfsPartition> dirPathPartitions = partsByPath.get(dirPath); if (dirPathPartitions != null) { // The dirPath is a partition directory. This means the path is the root of an // unpartitioned table, or the path of at least one partition. for (HdfsPartition partition : dirPathPartitions) { partition.setFileDescriptors(new ArrayList<FileDescriptor>()); } } else { // The dirPath is not a partition directory. We expect it to be an ancestor of // partition paths (e.g., the table root). Clear all partitions whose paths are // a descendant of dirPath. for (Map.Entry<Path, List<HdfsPartition>> entry : partsByPath.entrySet()) { Path partDir = entry.getKey(); if (!FileSystemUtil.isDescendantPath(partDir, dirPath)) continue; for (HdfsPartition partition : entry.getValue()) { partition.setFileDescriptors(new ArrayList<FileDescriptor>()); } } } // For file systems that do not support BlockLocation API, we manually synthesize // block location metadata based on file formats. if (!FileSystemUtil.supportsStorageIds(fs)) { synthesizeBlockMetadata(fs, dirPath, partsByPath); return; } int unknownDiskIdCount = 0; RemoteIterator<LocatedFileStatus> fileStatusIter = fs.listFiles(dirPath, true); while (fileStatusIter.hasNext()) { LocatedFileStatus fileStatus = fileStatusIter.next(); if (!FileSystemUtil.isValidDataFile(fileStatus)) continue; // Find the partition that this file belongs (if any). Path partPathDir = fileStatus.getPath().getParent(); Preconditions.checkNotNull(partPathDir); List<HdfsPartition> partitions = partsByPath.get(partPathDir); // Skip if this file does not belong to any known partition. if (partitions == null) { if (LOG.isTraceEnabled()) { LOG.trace("File " + fileStatus.getPath().toString() + " doesn't correspond " + " to a known partition. Skipping metadata load for this file."); } continue; } String fileName = fileStatus.getPath().getName(); FileDescriptor fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime()); BlockLocation[] locations = fileStatus.getBlockLocations(); unknownDiskIdCount += setFdBlockMetadata(fd, locations); if (LOG.isTraceEnabled()) { LOG.trace("Adding file md dir: " + partPathDir.toString() + " file: " + fileName); } // Update the partitions' metadata that this file belongs to. for (HdfsPartition partition : partitions) { partition.getFileDescriptors().add(fd); numHdfsFiles_++; totalHdfsBytes_ += fd.getFileLength(); } } if (unknownDiskIdCount > 0) { if (LOG.isWarnEnabled()) { LOG.warn("Unknown disk id count for filesystem " + fs + ":" + unknownDiskIdCount); } } } catch (IOException e) { throw new RuntimeException( "Error loading block metadata for directory " + dirPath.toString() + ": " + e.getMessage(), e); } } /** * Sets the block metadata for FileDescriptor 'fd' using block location metadata * from 'locations'. */ private int setFdBlockMetadata(FileDescriptor fd, BlockLocation[] locations) throws IOException { int unknownFdDiskIds = 0; for (BlockLocation loc : locations) { Set<String> cachedHosts = Sets.newHashSet(loc.getCachedHosts()); // Enumerate all replicas of the block, adding any unknown hosts // to hostIndex_. We pick the network address from getNames() and // map it to the corresponding hostname from getHosts(). List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(loc.getNames().length); for (int i = 0; i < loc.getNames().length; ++i) { TNetworkAddress networkAddress = BlockReplica.parseLocation(loc.getNames()[i]); replicas.add(new BlockReplica(hostIndex_.getIndex(networkAddress), cachedHosts.contains(loc.getHosts()[i]))); } FileBlock currentBlock = new FileBlock(loc.getOffset(), loc.getLength(), replicas); THdfsFileBlock tHdfsFileBlock = currentBlock.toThrift(); fd.addThriftFileBlock(tHdfsFileBlock); unknownFdDiskIds += loadDiskIds(loc, tHdfsFileBlock); } return unknownFdDiskIds; } /** * Loads the disk IDs for BlockLocation 'location' and its corresponding file block. * HDFS API for BlockLocation returns a storageID UUID string for each disk * hosting the block, which is then mapped to a 0-based integer id called disk ID. * Returns the number of unknown disk IDs encountered in this process. */ private int loadDiskIds(BlockLocation location, THdfsFileBlock fileBlock) { int unknownDiskIdCount = 0; String[] storageIds = location.getStorageIds(); String[] hosts; try { hosts = location.getHosts(); } catch (IOException e) { LOG.error("Couldn't get hosts for block: " + location.toString(), e); return unknownDiskIdCount; } if (storageIds.length != hosts.length) { LOG.error("Number of storage IDs and number of hosts for block: " + location.toString() + " mismatch. Skipping disk ID loading for this block."); return unknownDiskIdCount; } int[] diskIDs = new int[storageIds.length]; for (int i = 0; i < storageIds.length; ++i) { if (Strings.isNullOrEmpty(storageIds[i])) { diskIDs[i] = -1; ++unknownDiskIdCount; } else { diskIDs[i] = DiskIdMapper.INSTANCE.getDiskId(hosts[i], storageIds[i]); } } FileBlock.setDiskIds(diskIDs, fileBlock); return unknownDiskIdCount; } /** * Synthesize the block metadata for a given HdfsPartition object. Should only * be called for FileSystems that do not support storage IDs. */ private void synthesizeBlockMetadata(FileSystem fs, HdfsPartition partition) throws IOException { Preconditions.checkState(!FileSystemUtil.supportsStorageIds(fs)); HashMap<Path, List<HdfsPartition>> partsByPath = Maps.newHashMap(); Path partitionPath = partition.getLocationPath(); partition.setFileDescriptors(new ArrayList<FileDescriptor>()); partsByPath.put(partitionPath, Lists.newArrayList(partition)); synthesizeBlockMetadata(fs, partitionPath, partsByPath); } /** * For filesystems that don't support BlockLocation API, synthesize file blocks * by manually splitting the file range into fixed-size blocks. That way, scan * ranges can be derived from file blocks as usual. All synthesized blocks are given * an invalid network address so that the scheduler will treat them as remote. */ private void synthesizeBlockMetadata(FileSystem fs, Path dirPath, HashMap<Path, List<HdfsPartition>> partsByPath) throws IOException { RemoteIterator<LocatedFileStatus> fileStatusIter = fs.listFiles(dirPath, true); while (fileStatusIter.hasNext()) { LocatedFileStatus fileStatus = fileStatusIter.next(); if (!FileSystemUtil.isValidDataFile(fileStatus)) continue; Path partPathDir = fileStatus.getPath().getParent(); Preconditions.checkNotNull(partPathDir); List<HdfsPartition> partitions = partsByPath.get(partPathDir); // Skip if this file does not belong to any known partition. if (partitions == null) { if (LOG.isTraceEnabled()) { LOG.trace("File " + fileStatus.getPath().toString() + " doesn't correspond " + " to a known partition. Skipping metadata load for this file."); } continue; } String fileName = fileStatus.getPath().getName(); FileDescriptor fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime()); Preconditions.checkState(partitions.size() > 0); // For the purpose of synthesizing block metadata, we assume that all partitions // with the same location have the same file format. HdfsFileFormat fileFormat = partitions.get(0).getFileFormat(); synthesizeFdBlockMetadata(fs, fd, fileFormat); // Update the partitions' metadata that this file belongs to. for (HdfsPartition partition : partitions) { partition.getFileDescriptors().add(fd); numHdfsFiles_++; totalHdfsBytes_ += fd.getFileLength(); } } } /** * Helper method to synthesize block metadata for file descriptor fd. */ private void synthesizeFdBlockMetadata(FileSystem fs, FileDescriptor fd, HdfsFileFormat fileFormat) { long start = 0; long remaining = fd.getFileLength(); // Workaround HADOOP-11584 by using the filesystem default block size rather than // the block size from the FileStatus. // TODO: after HADOOP-11584 is resolved, get the block size from the FileStatus. long blockSize = fs.getDefaultBlockSize(); if (blockSize < MIN_SYNTHETIC_BLOCK_SIZE) blockSize = MIN_SYNTHETIC_BLOCK_SIZE; if (!fileFormat.isSplittable(HdfsCompression.fromFileName(fd.getFileName()))) { blockSize = remaining; } while (remaining > 0) { long len = Math.min(remaining, blockSize); List<BlockReplica> replicas = Lists .newArrayList(new BlockReplica(hostIndex_.getIndex(REMOTE_NETWORK_ADDRESS), false)); fd.addFileBlock(new FileBlock(start, len, replicas)); remaining -= len; start += len; } } @Override public TCatalogObjectType getCatalogObjectType() { return TCatalogObjectType.TABLE; } public boolean isMarkedCached() { return isMarkedCached_; } public Collection<HdfsPartition> getPartitions() { return partitionMap_.values(); } public Map<Long, HdfsPartition> getPartitionMap() { return partitionMap_; } public Set<Long> getNullPartitionIds(int i) { return nullPartitionIds_.get(i); } public HdfsPartitionLocationCompressor getPartitionLocationCompressor() { return partitionLocationCompressor_; } public Set<Long> getPartitionIds() { return partitionIds_; } public TreeMap<LiteralExpr, HashSet<Long>> getPartitionValueMap(int i) { return partitionValuesMap_.get(i); } /** * Returns the value Hive is configured to use for NULL partition key values. * Set during load. */ public String getNullPartitionKeyValue() { return nullPartitionKeyValue_; } /* * Returns the storage location (HDFS path) of this table. */ public String getLocation() { return super.getMetaStoreTable().getSd().getLocation(); } List<FieldSchema> getNonPartitionFieldSchemas() { return nonPartFieldSchemas_; } // True if Impala has HDFS write permissions on the hdfsBaseDir (for an unpartitioned // table) or if Impala has write permissions on all partition directories (for // a partitioned table). public boolean hasWriteAccess() { return TAccessLevelUtil.impliesWriteAccess(accessLevel_); } /** * Returns the first location (HDFS path) that Impala does not have WRITE access * to, or an null if none is found. For an unpartitioned table, this just * checks the hdfsBaseDir. For a partitioned table it checks all partition directories. */ public String getFirstLocationWithoutWriteAccess() { if (getMetaStoreTable() == null) return null; if (getMetaStoreTable().getPartitionKeysSize() == 0) { if (!TAccessLevelUtil.impliesWriteAccess(accessLevel_)) { return hdfsBaseDir_; } } else { for (HdfsPartition partition : partitionMap_.values()) { if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) { return partition.getLocation(); } } } return null; } /** * Gets the HdfsPartition matching the given partition spec. Returns null if no match * was found. */ public HdfsPartition getPartition(List<PartitionKeyValue> partitionSpec) { List<TPartitionKeyValue> partitionKeyValues = Lists.newArrayList(); for (PartitionKeyValue kv : partitionSpec) { String value = PartitionKeyValue.getPartitionKeyValueString(kv.getLiteralValue(), getNullPartitionKeyValue()); partitionKeyValues.add(new TPartitionKeyValue(kv.getColName(), value)); } return getPartitionFromThriftPartitionSpec(partitionKeyValues); } /** * Gets the HdfsPartition matching the Thrift version of the partition spec. * Returns null if no match was found. */ public HdfsPartition getPartitionFromThriftPartitionSpec(List<TPartitionKeyValue> partitionSpec) { // First, build a list of the partition values to search for in the same order they // are defined in the table. List<String> targetValues = Lists.newArrayList(); Set<String> keys = Sets.newHashSet(); for (FieldSchema fs : getMetaStoreTable().getPartitionKeys()) { for (TPartitionKeyValue kv : partitionSpec) { if (fs.getName().toLowerCase().equals(kv.getName().toLowerCase())) { targetValues.add(kv.getValue()); // Same key was specified twice if (!keys.add(kv.getName().toLowerCase())) { return null; } } } } // Make sure the number of values match up and that some values were found. if (targetValues.size() == 0 || (targetValues.size() != getMetaStoreTable().getPartitionKeysSize())) { return null; } // Search through all the partitions and check if their partition key values // match the values being searched for. for (HdfsPartition partition : partitionMap_.values()) { if (partition.isDefaultPartition()) continue; List<LiteralExpr> partitionValues = partition.getPartitionValues(); Preconditions.checkState(partitionValues.size() == targetValues.size()); boolean matchFound = true; for (int i = 0; i < targetValues.size(); ++i) { String value; if (partitionValues.get(i) instanceof NullLiteral) { value = getNullPartitionKeyValue(); } else { value = partitionValues.get(i).getStringValue(); Preconditions.checkNotNull(value); // See IMPALA-252: we deliberately map empty strings on to // NULL when they're in partition columns. This is for // backwards compatibility with Hive, and is clearly broken. if (value.isEmpty()) value = getNullPartitionKeyValue(); } if (!targetValues.get(i).equals(value)) { matchFound = false; break; } } if (matchFound) { return partition; } } return null; } /** * Gets hdfs partitions by the given partition set. */ public List<HdfsPartition> getPartitionsFromPartitionSet(List<List<TPartitionKeyValue>> partitionSet) { List<HdfsPartition> partitions = Lists.newArrayList(); for (List<TPartitionKeyValue> kv : partitionSet) { HdfsPartition partition = getPartitionFromThriftPartitionSpec(kv); if (partition != null) partitions.add(partition); } return partitions; } /** * Create columns corresponding to fieldSchemas. Throws a TableLoadingException if the * metadata is incompatible with what we support. */ private void addColumnsFromFieldSchemas(List<FieldSchema> fieldSchemas) throws TableLoadingException { int pos = colsByPos_.size(); for (FieldSchema s : fieldSchemas) { Type type = parseColumnType(s); // Check if we support partitioning on columns of such a type. if (pos < numClusteringCols_ && !type.supportsTablePartitioning()) { throw new TableLoadingException(String.format( "Failed to load metadata for table '%s' because of " + "unsupported partition-column type '%s' in partition column '%s'", getFullName(), type.toString(), s.getName())); } Column col = new Column(s.getName(), type, s.getComment(), pos); addColumn(col); ++pos; } } /** * Clear the partitions of an HdfsTable and the associated metadata. */ private void resetPartitions() { partitionIds_.clear(); partitionMap_.clear(); nameToPartitionMap_.clear(); partitionValuesMap_.clear(); nullPartitionIds_.clear(); // Initialize partitionValuesMap_ and nullPartitionIds_. Also reset column stats. for (int i = 0; i < numClusteringCols_; ++i) { getColumns().get(i).getStats().setNumNulls(0); getColumns().get(i).getStats().setNumDistinctValues(0); partitionValuesMap_.add(Maps.<LiteralExpr, HashSet<Long>>newTreeMap()); nullPartitionIds_.add(Sets.<Long>newHashSet()); } numHdfsFiles_ = 0; totalHdfsBytes_ = 0; } /** * Resets any partition metadata, creates the default partition and sets the base * table directory path as well as the caching info from the HMS table. */ private void initializePartitionMetadata(org.apache.hadoop.hive.metastore.api.Table msTbl) throws CatalogException { Preconditions.checkNotNull(msTbl); resetPartitions(); hdfsBaseDir_ = msTbl.getSd().getLocation(); // INSERT statements need to refer to this if they try to write to new partitions // Scans don't refer to this because by definition all partitions they refer to // exist. addDefaultPartition(msTbl.getSd()); // We silently ignore cache directives that no longer exist in HDFS, and remove // non-existing cache directives from the parameters. isMarkedCached_ = HdfsCachingUtil.validateCacheParams(msTbl.getParameters()); } /** * Create HdfsPartition objects corresponding to 'msPartitions' and add them to this * table's partition list. Any partition metadata will be reset and loaded from * scratch. For each partition created, we load the block metadata for each data file * under it. We optimize the block metadata loading by grouping together the name node * requests for all the partitions under the table base directory into a single RPC. * * If there are no partitions in the Hive metadata, a single partition is added with no * partition keys. */ private void loadAllPartitions(List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions, org.apache.hadoop.hive.metastore.api.Table msTbl) throws IOException, CatalogException { Preconditions.checkNotNull(msTbl); initializePartitionMetadata(msTbl); // Map of partition paths to their corresponding HdfsPartition objects. Populated // using createPartition() calls. A single partition path can correspond to multiple // partitions. HashMap<Path, List<HdfsPartition>> partsByPath = Maps.newHashMap(); // Qualify to ensure isDescendantPath() works correctly. Path tblLocation = FileSystemUtil.createFullyQualifiedPath(getHdfsBaseDirPath()); // List of directories that we scan for block locations. We optimize the block metadata // loading to reduce the number of RPCs to the NN by separately loading partitions // with default directory paths (under the base table directory) and non-default // directory paths. For the former we issue a single RPC to the NN to load all the // blocks from hdfsBaseDir_ and for the latter we load each of the partition directory // separately. // TODO: We can still do some advanced optimization by grouping all the partition // directories under the same ancestor path up the tree. Set<Path> dirsToLoad = Sets.newHashSet(tblLocation); if (msTbl.getPartitionKeysSize() == 0) { Preconditions.checkArgument(msPartitions == null || msPartitions.isEmpty()); // This table has no partition key, which means it has no declared partitions. // We model partitions slightly differently to Hive - every file must exist in a // partition, so add a single partition with no keys which will get all the // files in the table's root directory. HdfsPartition part = createPartition(msTbl.getSd(), null); partsByPath.put(tblLocation, Lists.newArrayList(part)); if (isMarkedCached_) part.markCached(); addPartition(part); FileSystem fs = tblLocation.getFileSystem(CONF); if (fs.exists(tblLocation)) { accessLevel_ = getAvailableAccessLevel(fs, tblLocation); } } else { for (org.apache.hadoop.hive.metastore.api.Partition msPartition : msPartitions) { HdfsPartition partition = createPartition(msPartition.getSd(), msPartition); addPartition(partition); // If the partition is null, its HDFS path does not exist, and it was not added // to this table's partition list. Skip the partition. if (partition == null) continue; if (msPartition.getParameters() != null) { partition.setNumRows(getRowCount(msPartition.getParameters())); } if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) { // TODO: READ_ONLY isn't exactly correct because the it's possible the // partition does not have READ permissions either. When we start checking // whether we can READ from a table, this should be updated to set the // table's access level to the "lowest" effective level across all // partitions. That is, if one partition has READ_ONLY and another has // WRITE_ONLY the table's access level should be NONE. accessLevel_ = TAccessLevel.READ_ONLY; } // Qualify to ensure isDescendantPath() works correctly. Path partDir = FileSystemUtil.createFullyQualifiedPath(new Path(msPartition.getSd().getLocation())); List<HdfsPartition> parts = partsByPath.get(partDir); if (parts == null) { partsByPath.put(partDir, Lists.newArrayList(partition)); } else { parts.add(partition); } if (!dirsToLoad.contains(partDir) && !FileSystemUtil.isDescendantPath(partDir, tblLocation)) { // This partition has a custom filesystem location. Load its file/block // metadata separately by adding it to the list of dirs to load. dirsToLoad.add(partDir); } } } loadMetadataAndDiskIds(dirsToLoad, partsByPath); } /** * Refreshes block metadata information for 'partition'. This method is optimized for * the case where the files in the partition have not changed dramatically. It first * uses a listStatus() call on the partition directory to detect files with changed * mtime and fetches their block locations using the getFileBlockLocations() method. * Our benchmarks suggest that the listStatus() call is much faster then the listFiles() * (up to ~40x faster in some cases). The initial table load still uses the listFiles() * on the data directory that fetches both the FileStatus as well as BlockLocations in * a single call. */ private void refreshFileMetadata(HdfsPartition partition) throws CatalogException { Path partDir = partition.getLocationPath(); Preconditions.checkNotNull(partDir); try { FileSystem fs = partDir.getFileSystem(CONF); if (!fs.exists(partDir)) { partition.setFileDescriptors(new ArrayList<FileDescriptor>()); return; } if (!FileSystemUtil.supportsStorageIds(fs)) { synthesizeBlockMetadata(fs, partition); return; } // Index the partition file descriptors by their file names for O(1) look ups. ImmutableMap<String, FileDescriptor> fileDescsByName = Maps.uniqueIndex(partition.getFileDescriptors(), new Function<FileDescriptor, String>() { public String apply(FileDescriptor desc) { return desc.getFileName(); } }); // Iterate through the current snapshot of the partition directory listing to // figure out files that were newly added/modified. List<FileDescriptor> newFileDescs = Lists.newArrayList(); long newPartSizeBytes = 0; for (FileStatus fileStatus : fs.listStatus(partDir)) { if (!FileSystemUtil.isValidDataFile(fileStatus)) continue; String fileName = fileStatus.getPath().getName().toString(); FileDescriptor fd = fileDescsByName.get(fileName); if (fd == null || partition.isMarkedCached() || fd.getFileLength() != fileStatus.getLen() || fd.getModificationTime() != fileStatus.getModificationTime()) { fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime()); setFdBlockMetadata(fd, fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen())); } newFileDescs.add(fd); newPartSizeBytes += fileStatus.getLen(); } partition.setFileDescriptors(newFileDescs); numHdfsFiles_ += newFileDescs.size(); totalHdfsBytes_ += newPartSizeBytes; } catch (IOException e) { throw new CatalogException("Error loading block metadata for partition " + partition.toString(), e); } } /** * Helper method to load the block locations from each directory in 'locations' * and filtering only the paths from 'partsByPath'. Also loads the disk IDs * corresponding to these block locations. */ private void loadMetadataAndDiskIds(Set<Path> locations, HashMap<Path, List<HdfsPartition>> partsByPath) { LOG.info(String.format("Loading file and block metadata for %s partitions from %s paths: %s", partsByPath.size(), locations.size(), getFullName())); for (Path location : locations) loadBlockMetadata(location, partsByPath); LOG.info(String.format("Loaded file and block metadata for %s partitions from %s paths: %s", partsByPath.size(), locations.size(), getFullName())); } /** * Gets the AccessLevel that is available for Impala for this table based on the * permissions Impala has on the given path. If the path does not exist, recurses up * the path until a existing parent directory is found, and inherit access permissions * from that. * Always returns READ_WRITE for S3 files. */ private TAccessLevel getAvailableAccessLevel(FileSystem fs, Path location) throws IOException { // Avoid calling getPermissions() on file path for S3 files, as that makes a round // trip to S3. Also, the S3A connector is currently unable to manage S3 permissions, // so for now it is safe to assume that all files(objects) have READ_WRITE // permissions, as that's what the S3A connector will always return too. // TODO: Revisit if the S3A connector is updated to be able to manage S3 object // permissions. (see HADOOP-13892) if (FileSystemUtil.isS3AFileSystem(fs)) return TAccessLevel.READ_WRITE; FsPermissionChecker permissionChecker = FsPermissionChecker.getInstance(); while (location != null) { if (fs.exists(location)) { FsPermissionChecker.Permissions perms = permissionChecker.getPermissions(fs, location); if (perms.canReadAndWrite()) { return TAccessLevel.READ_WRITE; } else if (perms.canRead()) { return TAccessLevel.READ_ONLY; } else if (perms.canWrite()) { return TAccessLevel.WRITE_ONLY; } return TAccessLevel.NONE; } location = location.getParent(); } // Should never get here. Preconditions.checkNotNull(location, "Error: no path ancestor exists"); return TAccessLevel.NONE; } /** * Creates a new HdfsPartition object to be added to HdfsTable's partition list. * Partitions may be empty, or may not even exist in the filesystem (a partition's * location may have been changed to a new path that is about to be created by an * INSERT). Also loads the block metadata for this partition. Returns new partition * if successful or null if none was created. * * Throws CatalogException if the supplied storage descriptor contains metadata that * Impala can't understand. */ public HdfsPartition createAndLoadPartition(StorageDescriptor storageDescriptor, org.apache.hadoop.hive.metastore.api.Partition msPartition) throws CatalogException { HdfsPartition hdfsPartition = createPartition(storageDescriptor, msPartition); refreshFileMetadata(hdfsPartition); return hdfsPartition; } /** * Creates a new HdfsPartition from a specified StorageDescriptor and an HMS partition * object. */ private HdfsPartition createPartition(StorageDescriptor storageDescriptor, org.apache.hadoop.hive.metastore.api.Partition msPartition) throws CatalogException { HdfsStorageDescriptor fileFormatDescriptor = HdfsStorageDescriptor.fromStorageDescriptor(this.name_, storageDescriptor); List<LiteralExpr> keyValues = Lists.newArrayList(); if (msPartition != null) { // Load key values for (String partitionKey : msPartition.getValues()) { Type type = getColumns().get(keyValues.size()).getType(); // Deal with Hive's special NULL partition key. if (partitionKey.equals(nullPartitionKeyValue_)) { keyValues.add(NullLiteral.create(type)); } else { try { keyValues.add(LiteralExpr.create(partitionKey, type)); } catch (Exception ex) { LOG.warn("Failed to create literal expression of type: " + type, ex); throw new CatalogException("Invalid partition key value of type: " + type, ex); } } } for (Expr v : keyValues) v.analyzeNoThrow(null); } Path partDirPath = new Path(storageDescriptor.getLocation()); try { FileSystem fs = partDirPath.getFileSystem(CONF); multipleFileSystems_ = multipleFileSystems_ || !FileSystemUtil.isPathOnFileSystem(new Path(getLocation()), fs); if (msPartition != null) { HdfsCachingUtil.validateCacheParams(msPartition.getParameters()); } HdfsPartition partition = new HdfsPartition(this, msPartition, keyValues, fileFormatDescriptor, new ArrayList<FileDescriptor>(), getAvailableAccessLevel(fs, partDirPath)); partition.checkWellFormed(); return partition; } catch (IOException e) { throw new CatalogException("Error initializing partition", e); } } /** * Adds the partition to the HdfsTable. Throws a CatalogException if the partition * already exists in this table. */ public void addPartition(HdfsPartition partition) throws CatalogException { if (partitionMap_.containsKey(partition.getId())) { throw new CatalogException(String.format("Partition %s already exists in table %s", partition.getPartitionName(), getFullName())); } if (partition.getFileFormat() == HdfsFileFormat.AVRO) hasAvroData_ = true; partitionMap_.put(partition.getId(), partition); totalHdfsBytes_ += partition.getSize(); numHdfsFiles_ += partition.getNumFileDescriptors(); updatePartitionMdAndColStats(partition); } /** * Updates the HdfsTable's partition metadata, i.e. adds the id to the HdfsTable and * populates structures used for speeding up partition pruning/lookup. Also updates * column stats. */ private void updatePartitionMdAndColStats(HdfsPartition partition) { if (partition.getPartitionValues().size() != numClusteringCols_) return; partitionIds_.add(partition.getId()); for (int i = 0; i < partition.getPartitionValues().size(); ++i) { ColumnStats stats = getColumns().get(i).getStats(); LiteralExpr literal = partition.getPartitionValues().get(i); // Store partitions with null partition values separately if (literal instanceof NullLiteral) { stats.setNumNulls(stats.getNumNulls() + 1); if (nullPartitionIds_.get(i).isEmpty()) { stats.setNumDistinctValues(stats.getNumDistinctValues() + 1); } nullPartitionIds_.get(i).add(partition.getId()); continue; } HashSet<Long> partitionIds = partitionValuesMap_.get(i).get(literal); if (partitionIds == null) { partitionIds = Sets.newHashSet(); partitionValuesMap_.get(i).put(literal, partitionIds); stats.setNumDistinctValues(stats.getNumDistinctValues() + 1); } partitionIds.add(partition.getId()); } nameToPartitionMap_.put(partition.getPartitionName(), partition); } /** * Drops the partition having the given partition spec from HdfsTable. Cleans up its * metadata from all the mappings used to speed up partition pruning/lookup. * Also updates partition column statistics. Given partitionSpec must match exactly * one partition. * Returns the HdfsPartition that was dropped. If the partition does not exist, returns * null. */ public HdfsPartition dropPartition(List<TPartitionKeyValue> partitionSpec) { return dropPartition(getPartitionFromThriftPartitionSpec(partitionSpec)); } /** * Drops a partition and updates partition column statistics. Returns the * HdfsPartition that was dropped or null if the partition does not exist. */ private HdfsPartition dropPartition(HdfsPartition partition) { if (partition == null) return null; totalHdfsBytes_ -= partition.getSize(); numHdfsFiles_ -= partition.getNumFileDescriptors(); Preconditions.checkArgument(partition.getPartitionValues().size() == numClusteringCols_); Long partitionId = partition.getId(); // Remove the partition id from the list of partition ids and other mappings. partitionIds_.remove(partitionId); partitionMap_.remove(partitionId); nameToPartitionMap_.remove(partition.getPartitionName()); for (int i = 0; i < partition.getPartitionValues().size(); ++i) { ColumnStats stats = getColumns().get(i).getStats(); LiteralExpr literal = partition.getPartitionValues().get(i); // Check if this is a null literal. if (literal instanceof NullLiteral) { nullPartitionIds_.get(i).remove(partitionId); stats.setNumNulls(stats.getNumNulls() - 1); if (nullPartitionIds_.get(i).isEmpty()) { stats.setNumDistinctValues(stats.getNumDistinctValues() - 1); } continue; } HashSet<Long> partitionIds = partitionValuesMap_.get(i).get(literal); // If there are multiple partition ids corresponding to a literal, remove // only this id. Otherwise, remove the <literal, id> pair. if (partitionIds.size() > 1) partitionIds.remove(partitionId); else { partitionValuesMap_.get(i).remove(literal); stats.setNumDistinctValues(stats.getNumDistinctValues() - 1); } } return partition; } /** * Drops the given partitions from this table. Cleans up its metadata from all the * mappings used to speed up partition pruning/lookup. Also updates partitions column * statistics. Returns the list of partitions that were dropped. */ public List<HdfsPartition> dropPartitions(List<HdfsPartition> partitions) { ArrayList<HdfsPartition> droppedPartitions = Lists.newArrayList(); for (HdfsPartition partition : partitions) { HdfsPartition hdfsPartition = dropPartition(partition); if (hdfsPartition != null) droppedPartitions.add(hdfsPartition); } return droppedPartitions; } /** * Adds or replaces the default partition. */ public void addDefaultPartition(StorageDescriptor storageDescriptor) throws CatalogException { // Default partition has no files and is not referred to by scan nodes. Data sinks // refer to this to understand how to create new partitions. HdfsStorageDescriptor hdfsStorageDescriptor = HdfsStorageDescriptor.fromStorageDescriptor(this.name_, storageDescriptor); HdfsPartition partition = HdfsPartition.defaultPartition(this, hdfsStorageDescriptor); partitionMap_.put(partition.getId(), partition); } @Override public void load(boolean reuseMetadata, IMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException { load(reuseMetadata, client, msTbl, true, true, null); } /** * Loads table metadata from the Hive Metastore. * * If 'reuseMetadata' is false, performs a full metadata load from the Hive Metastore, * including partition and file metadata. Otherwise, loads metadata incrementally and * updates this HdfsTable in place so that it is in sync with the Hive Metastore. * * Depending on the operation that triggered the table metadata load, not all the * metadata may need to be updated. If 'partitionsToUpdate' is not null, it specifies a * list of partitions for which metadata should be updated. Otherwise, all partition * metadata will be updated from the Hive Metastore. * * If 'loadFileMetadata' is true, file metadata of the specified partitions are * reloaded from scratch. If 'partitionsToUpdate' is not specified, file metadata of all * the partitions are loaded. * * If 'loadTableSchema' is true, the table schema is loaded from the Hive Metastore. * * There are several cases where existing file descriptors might be reused incorrectly: * 1. an ALTER TABLE ADD PARTITION or dynamic partition insert is executed through * Hive. This does not update the lastDdlTime. * 2. Hdfs rebalancer is executed. This changes the block locations but doesn't update * the mtime (file modification time). * If any of these occur, user has to execute "invalidate metadata" to invalidate the * metadata cache of the table and trigger a fresh load. */ public void load(boolean reuseMetadata, IMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl, boolean loadFileMetadata, boolean loadTableSchema, Set<String> partitionsToUpdate) throws TableLoadingException { // turn all exceptions into TableLoadingException msTable_ = msTbl; try { if (loadTableSchema) loadSchema(client, msTbl); if (reuseMetadata && getCatalogVersion() == Catalog.INITIAL_CATALOG_VERSION) { // This is the special case of CTAS that creates a 'temp' table that does not // actually exist in the Hive Metastore. initializePartitionMetadata(msTbl); updateStatsFromHmsTable(msTbl); return; } // Load partition and file metadata if (reuseMetadata) { // Incrementally update this table's partitions and file metadata LOG.info("Incrementally loading table metadata for: " + getFullName()); Preconditions.checkState(partitionsToUpdate == null || loadFileMetadata); updateMdFromHmsTable(msTbl); if (msTbl.getPartitionKeysSize() == 0) { if (loadFileMetadata) updateUnpartitionedTableFileMd(); } else { updatePartitionsFromHms(client, partitionsToUpdate, loadFileMetadata); } LOG.info("Incrementally loaded table metadata for: " + getFullName()); } else { // Load all partitions from Hive Metastore, including file metadata. LOG.info("Fetching partition metadata from the Metastore: " + getFullName()); List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions = MetaStoreUtil .fetchAllPartitions(client, db_.getName(), name_, NUM_PARTITION_FETCH_RETRIES); LOG.info("Fetched partition metadata from the Metastore: " + getFullName()); loadAllPartitions(msPartitions, msTbl); } if (loadTableSchema) setAvroSchema(client, msTbl); updateStatsFromHmsTable(msTbl); } catch (TableLoadingException e) { throw e; } catch (Exception e) { throw new TableLoadingException("Failed to load metadata for table: " + name_, e); } } /** * Updates the table metadata, including 'hdfsBaseDir_', 'isMarkedCached_', * and 'accessLevel_' from 'msTbl'. Throws an IOException if there was an error * accessing the table location path. */ private void updateMdFromHmsTable(org.apache.hadoop.hive.metastore.api.Table msTbl) throws IOException { Preconditions.checkNotNull(msTbl); hdfsBaseDir_ = msTbl.getSd().getLocation(); isMarkedCached_ = HdfsCachingUtil.validateCacheParams(msTbl.getParameters()); if (msTbl.getPartitionKeysSize() == 0) { Path location = new Path(hdfsBaseDir_); FileSystem fs = location.getFileSystem(CONF); if (fs.exists(location)) { accessLevel_ = getAvailableAccessLevel(fs, location); } } setMetaStoreTable(msTbl); } /** * Updates the file metadata of an unpartitioned HdfsTable. */ private void updateUnpartitionedTableFileMd() throws CatalogException { if (LOG.isTraceEnabled()) { LOG.trace("update unpartitioned table: " + name_); } resetPartitions(); org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(); Preconditions.checkNotNull(msTbl); addDefaultPartition(msTbl.getSd()); HdfsPartition part = createPartition(msTbl.getSd(), null); addPartition(part); if (isMarkedCached_) part.markCached(); refreshFileMetadata(part); } /** * Updates the partitions of an HdfsTable so that they are in sync with the Hive * Metastore. It reloads partitions that were marked 'dirty' by doing a DROP + CREATE. * It removes from this table partitions that no longer exist in the Hive Metastore and * adds partitions that were added externally (e.g. using Hive) to the Hive Metastore * but do not exist in this table. If 'loadFileMetadata' is true, it triggers * file/block metadata reload for the partitions specified in 'partitionsToUpdate', if * any, or for all the table partitions if 'partitionsToUpdate' is null. */ private void updatePartitionsFromHms(IMetaStoreClient client, Set<String> partitionsToUpdate, boolean loadFileMetadata) throws Exception { if (LOG.isTraceEnabled()) LOG.trace("Sync table partitions: " + name_); org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(); Preconditions.checkNotNull(msTbl); Preconditions.checkState(msTbl.getPartitionKeysSize() != 0); Preconditions.checkState(loadFileMetadata || partitionsToUpdate == null); // Retrieve all the partition names from the Hive Metastore. We need this to // identify the delta between partitions of the local HdfsTable and the table entry // in the Hive Metastore. Note: This is a relatively "cheap" operation // (~.3 secs for 30K partitions). Set<String> msPartitionNames = Sets.newHashSet(); msPartitionNames.addAll(client.listPartitionNames(db_.getName(), name_, (short) -1)); // Names of loaded partitions in this table Set<String> partitionNames = Sets.newHashSet(); // Partitions for which file metadata must be loaded List<HdfsPartition> partitionsToUpdateFileMd = Lists.newArrayList(); // Partitions that need to be dropped and recreated from scratch List<HdfsPartition> dirtyPartitions = Lists.newArrayList(); // Partitions that need to be removed from this table. That includes dirty // partitions as well as partitions that were removed from the Hive Metastore. List<HdfsPartition> partitionsToRemove = Lists.newArrayList(); // Identify dirty partitions that need to be loaded from the Hive Metastore and // partitions that no longer exist in the Hive Metastore. for (HdfsPartition partition : partitionMap_.values()) { // Ignore the default partition if (partition.isDefaultPartition()) continue; // Remove partitions that don't exist in the Hive Metastore. These are partitions // that were removed from HMS using some external process, e.g. Hive. if (!msPartitionNames.contains(partition.getPartitionName())) { partitionsToRemove.add(partition); } if (partition.isDirty()) { // Dirty partitions are updated by removing them from table's partition // list and loading them from the Hive Metastore. dirtyPartitions.add(partition); } else { if (partitionsToUpdate == null && loadFileMetadata) { partitionsToUpdateFileMd.add(partition); } } Preconditions.checkNotNull(partition.getCachedMsPartitionDescriptor()); partitionNames.add(partition.getPartitionName()); } partitionsToRemove.addAll(dirtyPartitions); dropPartitions(partitionsToRemove); // Load dirty partitions from Hive Metastore loadPartitionsFromMetastore(dirtyPartitions, client); // Identify and load partitions that were added in the Hive Metastore but don't // exist in this table. Set<String> newPartitionsInHms = Sets.difference(msPartitionNames, partitionNames); loadPartitionsFromMetastore(newPartitionsInHms, client); // If a list of modified partitions (old and new) is specified, don't reload file // metadata for the new ones as they have already been detected in HMS and have been // reloaded by loadPartitionsFromMetastore(). if (partitionsToUpdate != null) { partitionsToUpdate.removeAll(newPartitionsInHms); } // Load file metadata. Until we have a notification mechanism for when a // file changes in hdfs, it is sometimes required to reload all the file // descriptors and block metadata of a table (e.g. REFRESH statement). if (loadFileMetadata) { if (partitionsToUpdate != null) { // Only reload file metadata of partitions specified in 'partitionsToUpdate' Preconditions.checkState(partitionsToUpdateFileMd.isEmpty()); partitionsToUpdateFileMd = getPartitionsByName(partitionsToUpdate); } loadPartitionFileMetadata(partitionsToUpdateFileMd); } } /** * Returns the HdfsPartition objects associated with the specified list of partition * names. */ private List<HdfsPartition> getPartitionsByName(Collection<String> partitionNames) { List<HdfsPartition> partitions = Lists.newArrayList(); for (String partitionName : partitionNames) { String partName = DEFAULT_PARTITION_NAME; if (partitionName.length() > 0) { // Trim the last trailing char '/' from each partition name partName = partitionName.substring(0, partitionName.length() - 1); } Preconditions.checkState(nameToPartitionMap_.containsKey(partName), "Invalid partition name: " + partName); partitions.add(nameToPartitionMap_.get(partName)); } return partitions; } /** * Updates the cardinality of this table from an HMS table. Sets the cardinalities of * dummy/default partitions for the case of unpartitioned tables. */ private void updateStatsFromHmsTable(org.apache.hadoop.hive.metastore.api.Table msTbl) { numRows_ = getRowCount(msTbl.getParameters()); // For unpartitioned tables set the numRows in its partitions // to the table's numRows. if (numClusteringCols_ == 0 && !partitionMap_.isEmpty()) { // Unpartitioned tables have a 'dummy' partition and a default partition. // Temp tables used in CTAS statements have one partition. Preconditions.checkState(partitionMap_.size() == 2 || partitionMap_.size() == 1); for (HdfsPartition p : partitionMap_.values()) { p.setNumRows(numRows_); } } } /** * Returns whether the table has the 'skip.header.line.count' property set. */ private boolean hasSkipHeaderLineCount() { String key = TBL_PROP_SKIP_HEADER_LINE_COUNT; org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(); if (msTbl == null) return false; String inputFormat = msTbl.getSd().getInputFormat(); return msTbl.getParameters().containsKey(key); } /** * Parses and returns the value of the 'skip.header.line.count' table property. If the * value is not set for the table, returns 0. If parsing fails or a value < 0 is found, * the error parameter is updated to contain an error message. */ public int parseSkipHeaderLineCount(StringBuilder error) { if (!hasSkipHeaderLineCount()) return 0; return parseSkipHeaderLineCount(getMetaStoreTable().getParameters(), error); } /** * Parses and returns the value of the 'skip.header.line.count' table property. The * caller must ensure that the property is contained in the 'tblProperties' map. If * parsing fails or a value < 0 is found, the error parameter is updated to contain an * error message. */ public static int parseSkipHeaderLineCount(Map<String, String> tblProperties, StringBuilder error) { Preconditions.checkState(tblProperties != null); String key = TBL_PROP_SKIP_HEADER_LINE_COUNT; Preconditions.checkState(tblProperties.containsKey(key)); // Try to parse. String string_value = tblProperties.get(key); int skipHeaderLineCount = 0; String error_msg = String.format( "Invalid value for table property %s: %s (value " + "must be an integer >= 0)", key, string_value); try { skipHeaderLineCount = Integer.parseInt(string_value); } catch (NumberFormatException exc) { error.append(error_msg); } if (skipHeaderLineCount < 0) error.append(error_msg); return skipHeaderLineCount; } /** * Sets avroSchema_ if the table or any of the partitions in the table are stored * as Avro. Additionally, this method also reconciles the schema if the column * definitions from the metastore differ from the Avro schema. */ private void setAvroSchema(IMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl) throws Exception { Preconditions.checkState(isSchemaLoaded_); String inputFormat = msTbl.getSd().getInputFormat(); if (HdfsFileFormat.fromJavaClassName(inputFormat) == HdfsFileFormat.AVRO || hasAvroData_) { // Look for Avro schema in TBLPROPERTIES and in SERDEPROPERTIES, with the latter // taking precedence. List<Map<String, String>> schemaSearchLocations = Lists.newArrayList(); schemaSearchLocations.add(getMetaStoreTable().getSd().getSerdeInfo().getParameters()); schemaSearchLocations.add(getMetaStoreTable().getParameters()); avroSchema_ = AvroSchemaUtils.getAvroSchema(schemaSearchLocations); if (avroSchema_ == null) { // No Avro schema was explicitly set in the table metadata, so infer the Avro // schema from the column definitions. Schema inferredSchema = AvroSchemaConverter.convertFieldSchemas(msTbl.getSd().getCols(), getFullName()); avroSchema_ = inferredSchema.toString(); } String serdeLib = msTbl.getSd().getSerdeInfo().getSerializationLib(); if (serdeLib == null || serdeLib.equals("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) { // If the SerDe library is null or set to LazySimpleSerDe or is null, it // indicates there is an issue with the table metadata since Avro table need a // non-native serde. Instead of failing to load the table, fall back to // using the fields from the storage descriptor (same as Hive). return; } else { // Generate new FieldSchemas from the Avro schema. This step reconciles // differences in the column definitions and the Avro schema. For // Impala-created tables this step is not necessary because the same // resolution is done during table creation. But Hive-created tables // store the original column definitions, and not the reconciled ones. List<ColumnDef> colDefs = ColumnDef.createFromFieldSchemas(msTbl.getSd().getCols()); List<ColumnDef> avroCols = AvroSchemaParser.parse(avroSchema_); StringBuilder warning = new StringBuilder(); List<ColumnDef> reconciledColDefs = AvroSchemaUtils.reconcileSchemas(colDefs, avroCols, warning); if (warning.length() != 0) { LOG.warn(String.format("Warning while loading table %s:\n%s", getFullName(), warning.toString())); } AvroSchemaUtils.setFromSerdeComment(reconciledColDefs); // Reset and update nonPartFieldSchemas_ to the reconcicled colDefs. nonPartFieldSchemas_.clear(); nonPartFieldSchemas_.addAll(ColumnDef.toFieldSchemas(reconciledColDefs)); // Update the columns as per the reconciled colDefs and re-load stats. clearColumns(); addColumnsFromFieldSchemas(msTbl.getPartitionKeys()); addColumnsFromFieldSchemas(nonPartFieldSchemas_); loadAllColumnStats(client); } } } /** * Loads table schema and column stats from Hive Metastore. */ private void loadSchema(IMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl) throws Exception { nonPartFieldSchemas_.clear(); // set nullPartitionKeyValue from the hive conf. nullPartitionKeyValue_ = client.getConfigValue("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__"); // set NULL indicator string from table properties nullColumnValue_ = msTbl.getParameters().get(serdeConstants.SERIALIZATION_NULL_FORMAT); if (nullColumnValue_ == null) nullColumnValue_ = DEFAULT_NULL_COLUMN_VALUE; // Excludes partition columns. nonPartFieldSchemas_.addAll(msTbl.getSd().getCols()); // The number of clustering columns is the number of partition keys. numClusteringCols_ = msTbl.getPartitionKeys().size(); partitionLocationCompressor_.setClusteringColumns(numClusteringCols_); clearColumns(); // Add all columns to the table. Ordering is important: partition columns first, // then all other columns. addColumnsFromFieldSchemas(msTbl.getPartitionKeys()); addColumnsFromFieldSchemas(nonPartFieldSchemas_); loadAllColumnStats(client); isSchemaLoaded_ = true; } /** * Loads partitions from the Hive Metastore and adds them to the internal list of * table partitions. */ private void loadPartitionsFromMetastore(List<HdfsPartition> partitions, IMetaStoreClient client) throws Exception { Preconditions.checkNotNull(partitions); if (partitions.isEmpty()) return; if (LOG.isTraceEnabled()) { LOG.trace(String.format("Incrementally updating %d/%d partitions.", partitions.size(), partitionMap_.size())); } Set<String> partitionNames = Sets.newHashSet(); for (HdfsPartition part : partitions) { partitionNames.add(part.getPartitionName()); } loadPartitionsFromMetastore(partitionNames, client); } /** * Loads from the Hive Metastore the partitions that correspond to the specified * 'partitionNames' and adds them to the internal list of table partitions. */ private void loadPartitionsFromMetastore(Set<String> partitionNames, IMetaStoreClient client) throws Exception { Preconditions.checkNotNull(partitionNames); if (partitionNames.isEmpty()) return; // Load partition metadata from Hive Metastore. List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions = Lists.newArrayList(); msPartitions.addAll(MetaStoreUtil.fetchPartitionsByName(client, Lists.newArrayList(partitionNames), db_.getName(), name_)); for (org.apache.hadoop.hive.metastore.api.Partition msPartition : msPartitions) { HdfsPartition partition = createPartition(msPartition.getSd(), msPartition); addPartition(partition); // If the partition is null, its HDFS path does not exist, and it was not added to // this table's partition list. Skip the partition. if (partition == null) continue; if (msPartition.getParameters() != null) { partition.setNumRows(getRowCount(msPartition.getParameters())); } if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) { // TODO: READ_ONLY isn't exactly correct because the it's possible the // partition does not have READ permissions either. When we start checking // whether we can READ from a table, this should be updated to set the // table's access level to the "lowest" effective level across all // partitions. That is, if one partition has READ_ONLY and another has // WRITE_ONLY the table's access level should be NONE. accessLevel_ = TAccessLevel.READ_ONLY; } refreshFileMetadata(partition); } } /** * Loads the file descriptors and block metadata of a list of partitions. */ private void loadPartitionFileMetadata(List<HdfsPartition> partitions) throws Exception { Preconditions.checkNotNull(partitions); if (LOG.isTraceEnabled()) { LOG.trace(String.format("loading file metadata for %d partitions", partitions.size())); } org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(); Preconditions.checkNotNull(msTbl); HdfsStorageDescriptor fileFormatDescriptor = HdfsStorageDescriptor.fromStorageDescriptor(this.name_, msTbl.getSd()); for (HdfsPartition partition : partitions) { org.apache.hadoop.hive.metastore.api.Partition msPart = partition.toHmsPartition(); if (msPart != null) { HdfsCachingUtil.validateCacheParams(msPart.getParameters()); } StorageDescriptor sd = null; if (msPart == null) { // If this partition is not stored in the Hive Metastore (e.g. default partition // of an unpartitioned table), use the table's storage descriptor to load file // metadata. sd = msTbl.getSd(); } else { sd = msPart.getSd(); } loadPartitionFileMetadata(sd, partition); } } /** * Loads the file descriptors and block metadata of a partition from its * StorageDescriptor. If 'partition' does not have an entry in the Hive Metastore, * 'storageDescriptor' is the StorageDescriptor of the associated table. Populates * 'perFsFileBlocks' with file block info and updates table metadata. */ private void loadPartitionFileMetadata(StorageDescriptor storageDescriptor, HdfsPartition partition) throws Exception { Preconditions.checkNotNull(storageDescriptor); Preconditions.checkNotNull(partition); org.apache.hadoop.hive.metastore.api.Partition msPart = partition.toHmsPartition(); Path partDirPath = new Path(storageDescriptor.getLocation()); FileSystem fs = partDirPath.getFileSystem(CONF); if (!fs.exists(partDirPath)) return; numHdfsFiles_ -= partition.getNumFileDescriptors(); totalHdfsBytes_ -= partition.getSize(); Preconditions.checkState(numHdfsFiles_ >= 0 && totalHdfsBytes_ >= 0); refreshFileMetadata(partition); } @Override protected List<String> getColumnNamesWithHmsStats() { List<String> ret = Lists.newArrayList(); // Only non-partition columns have column stats in the HMS. for (Column column : getColumns().subList(numClusteringCols_, getColumns().size())) { ret.add(column.getName().toLowerCase()); } return ret; } @Override protected synchronized void loadFromThrift(TTable thriftTable) throws TableLoadingException { super.loadFromThrift(thriftTable); THdfsTable hdfsTable = thriftTable.getHdfs_table(); Preconditions.checkState(hdfsTable.getPartition_prefixes() instanceof ArrayList<?>); partitionLocationCompressor_ = new HdfsPartitionLocationCompressor(numClusteringCols_, (ArrayList<String>) hdfsTable.getPartition_prefixes()); hdfsBaseDir_ = hdfsTable.getHdfsBaseDir(); nullColumnValue_ = hdfsTable.nullColumnValue; nullPartitionKeyValue_ = hdfsTable.nullPartitionKeyValue; multipleFileSystems_ = hdfsTable.multiple_filesystems; Preconditions.checkState(hdfsTable.getNetwork_addresses() instanceof ArrayList<?>); hostIndex_.populate((ArrayList<TNetworkAddress>) hdfsTable.getNetwork_addresses()); resetPartitions(); try { for (Map.Entry<Long, THdfsPartition> part : hdfsTable.getPartitions().entrySet()) { HdfsPartition hdfsPart = HdfsPartition.fromThrift(this, part.getKey(), part.getValue()); addPartition(hdfsPart); } } catch (CatalogException e) { throw new TableLoadingException(e.getMessage()); } avroSchema_ = hdfsTable.isSetAvroSchema() ? hdfsTable.getAvroSchema() : null; isMarkedCached_ = HdfsCachingUtil.validateCacheParams(getMetaStoreTable().getParameters()); } @Override public TTableDescriptor toThriftDescriptor(int tableId, Set<Long> referencedPartitions) { // Create thrift descriptors to send to the BE. The BE does not // need any information below the THdfsPartition level. TTableDescriptor tableDesc = new TTableDescriptor(tableId, TTableType.HDFS_TABLE, getTColumnDescriptors(), numClusteringCols_, name_, db_.getName()); tableDesc.setHdfsTable(getTHdfsTable(false, referencedPartitions)); return tableDesc; } @Override public TTable toThrift() { // Send all metadata between the catalog service and the FE. TTable table = super.toThrift(); table.setTable_type(TTableType.HDFS_TABLE); table.setHdfs_table(getTHdfsTable(true, null)); return table; } /** * Create a THdfsTable corresponding to this HdfsTable. If includeFileDesc is true, * then then all partitions and THdfsFileDescs of each partition should be included. * Otherwise, don't include any THdfsFileDescs, and include only those partitions in * the refPartitions set (the backend doesn't need metadata for unreferenced * partitions). To prevent the catalog from hitting an OOM error while trying to * serialize large partition incremental stats, we estimate the stats size and filter * the incremental stats data from partition objects if the estimate exceeds * --inc_stats_size_limit_bytes */ private THdfsTable getTHdfsTable(boolean includeFileDesc, Set<Long> refPartitions) { // includeFileDesc implies all partitions should be included (refPartitions == null). Preconditions.checkState(!includeFileDesc || refPartitions == null); int numPartitions = (refPartitions == null) ? partitionMap_.values().size() : refPartitions.size(); long statsSizeEstimate = numPartitions * getColumns().size() * STATS_SIZE_PER_COLUMN_BYTES; boolean includeIncrementalStats = (statsSizeEstimate < BackendConfig.INSTANCE.getIncStatsMaxSize()); Map<Long, THdfsPartition> idToPartition = Maps.newHashMap(); for (HdfsPartition partition : partitionMap_.values()) { long id = partition.getId(); if (refPartitions == null || refPartitions.contains(id)) { idToPartition.put(id, partition.toThrift(includeFileDesc, includeIncrementalStats)); } } THdfsTable hdfsTable = new THdfsTable(hdfsBaseDir_, getColumnNames(), nullPartitionKeyValue_, nullColumnValue_, idToPartition); hdfsTable.setAvroSchema(avroSchema_); hdfsTable.setMultiple_filesystems(multipleFileSystems_); if (includeFileDesc) { // Network addresses are used only by THdfsFileBlocks which are inside // THdfsFileDesc, so include network addreses only when including THdfsFileDesc. hdfsTable.setNetwork_addresses(hostIndex_.getList()); } hdfsTable.setPartition_prefixes(partitionLocationCompressor_.getPrefixes()); return hdfsTable; } public long getTotalHdfsBytes() { return totalHdfsBytes_; } public String getHdfsBaseDir() { return hdfsBaseDir_; } public Path getHdfsBaseDirPath() { return new Path(hdfsBaseDir_); } public boolean isAvroTable() { return avroSchema_ != null; } /** * Get the index of hosts that store replicas of blocks of this table. */ public ListMap<TNetworkAddress> getHostIndex() { return hostIndex_; } /** * Returns the file format that the majority of partitions are stored in. */ public HdfsFileFormat getMajorityFormat() { Map<HdfsFileFormat, Integer> numPartitionsByFormat = Maps.newHashMap(); for (HdfsPartition partition : partitionMap_.values()) { HdfsFileFormat format = partition.getInputFormatDescriptor().getFileFormat(); Integer numPartitions = numPartitionsByFormat.get(format); if (numPartitions == null) { numPartitions = Integer.valueOf(1); } else { numPartitions = Integer.valueOf(numPartitions.intValue() + 1); } numPartitionsByFormat.put(format, numPartitions); } int maxNumPartitions = Integer.MIN_VALUE; HdfsFileFormat majorityFormat = null; for (Map.Entry<HdfsFileFormat, Integer> entry : numPartitionsByFormat.entrySet()) { if (entry.getValue().intValue() > maxNumPartitions) { majorityFormat = entry.getKey(); maxNumPartitions = entry.getValue().intValue(); } } Preconditions.checkNotNull(majorityFormat); return majorityFormat; } /** * Returns the HDFS paths corresponding to HdfsTable partitions that don't exist in * the Hive Metastore. An HDFS path is represented as a list of strings values, one per * partition key column. */ public List<List<String>> getPathsWithoutPartitions() throws CatalogException { HashSet<List<LiteralExpr>> existingPartitions = new HashSet<List<LiteralExpr>>(); // Get the list of partition values of existing partitions in Hive Metastore. for (HdfsPartition partition : partitionMap_.values()) { if (partition.isDefaultPartition()) continue; existingPartitions.add(partition.getPartitionValues()); } List<String> partitionKeys = Lists.newArrayList(); for (int i = 0; i < numClusteringCols_; ++i) { partitionKeys.add(getColumns().get(i).getName()); } Path basePath = new Path(hdfsBaseDir_); List<List<String>> partitionsNotInHms = new ArrayList<List<String>>(); try { getAllPartitionsNotInHms(basePath, partitionKeys, existingPartitions, partitionsNotInHms); } catch (Exception e) { throw new CatalogException( String.format("Failed to recover partitions for %s " + "with exception:%s.", getFullName(), e)); } return partitionsNotInHms; } /** * Returns all partitions which match the partition keys directory structure and pass * type compatibility check. Also these partitions are not already part of the table. */ private void getAllPartitionsNotInHms(Path path, List<String> partitionKeys, HashSet<List<LiteralExpr>> existingPartitions, List<List<String>> partitionsNotInHms) throws IOException { FileSystem fs = path.getFileSystem(CONF); // Check whether the base directory exists. if (!fs.exists(path)) return; List<String> partitionValues = Lists.newArrayList(); List<LiteralExpr> partitionExprs = Lists.newArrayList(); getAllPartitionsNotInHms(path, partitionKeys, 0, fs, partitionValues, partitionExprs, existingPartitions, partitionsNotInHms); } /** * Returns all partitions which match the partition keys directory structure and pass * the type compatibility check. * * path e.g. c1=1/c2=2/c3=3 * partitionKeys The ordered partition keys. e.g.("c1", "c2", "c3") * depth The start position in partitionKeys to match the path name. * partitionValues The partition values used to create a partition. * partitionExprs The list of LiteralExprs which is used to avoid duplicate partitions. * E.g. Having /c1=0001 and /c1=01, we should make sure only one partition * will be added. * existingPartitions All partitions which exist in Hive Metastore or newly added. * partitionsNotInHms Contains all the recovered partitions. */ private void getAllPartitionsNotInHms(Path path, List<String> partitionKeys, int depth, FileSystem fs, List<String> partitionValues, List<LiteralExpr> partitionExprs, HashSet<List<LiteralExpr>> existingPartitions, List<List<String>> partitionsNotInHms) throws IOException { if (depth == partitionKeys.size()) { if (existingPartitions.contains(partitionExprs)) { if (LOG.isTraceEnabled()) { LOG.trace( String.format("Skip recovery of path '%s' because it already " + "exists in metastore", path.toString())); } } else { partitionsNotInHms.add(partitionValues); existingPartitions.add(partitionExprs); } return; } FileStatus[] statuses = fs.listStatus(path); for (FileStatus status : statuses) { if (!status.isDirectory()) continue; Pair<String, LiteralExpr> keyValues = getTypeCompatibleValue(status.getPath(), partitionKeys.get(depth)); if (keyValues == null) continue; List<String> currentPartitionValues = Lists.newArrayList(partitionValues); List<LiteralExpr> currentPartitionExprs = Lists.newArrayList(partitionExprs); currentPartitionValues.add(keyValues.first); currentPartitionExprs.add(keyValues.second); getAllPartitionsNotInHms(status.getPath(), partitionKeys, depth + 1, fs, currentPartitionValues, currentPartitionExprs, existingPartitions, partitionsNotInHms); } } /** * Checks that the last component of 'path' is of the form "<partitionkey>=<v>" * where 'v' is a type-compatible value from the domain of the 'partitionKey' column. * If not, returns null, otherwise returns a Pair instance, the first element is the * original value, the second element is the LiteralExpr created from the original * value. */ private Pair<String, LiteralExpr> getTypeCompatibleValue(Path path, String partitionKey) { String partName[] = path.getName().split("="); if (partName.length != 2 || !partName[0].equals(partitionKey)) return null; // Check Type compatibility for Partition value. Column column = getColumn(partName[0]); Preconditions.checkNotNull(column); Type type = column.getType(); LiteralExpr expr = null; if (!partName[1].equals(getNullPartitionKeyValue())) { try { expr = LiteralExpr.create(partName[1], type); // Skip large value which exceeds the MAX VALUE of specified Type. if (expr instanceof NumericLiteral) { if (NumericLiteral.isOverflow(((NumericLiteral) expr).getValue(), type)) { LOG.warn(String.format("Skip the overflow value (%s) for Type (%s).", partName[1], type.toSql())); return null; } } } catch (Exception ex) { if (LOG.isTraceEnabled()) { LOG.trace(String.format("Invalid partition value (%s) for Type (%s).", partName[1], type.toSql())); } return null; } } else { expr = new NullLiteral(); } return new Pair<String, LiteralExpr>(partName[1], expr); } /** * Returns statistics on this table as a tabular result set. Used for the * SHOW TABLE STATS statement. The schema of the returned TResultSet is set * inside this method. */ public TResultSet getTableStats() { TResultSet result = new TResultSet(); TResultSetMetadata resultSchema = new TResultSetMetadata(); result.setSchema(resultSchema); for (int i = 0; i < numClusteringCols_; ++i) { // Add the partition-key values as strings for simplicity. Column partCol = getColumns().get(i); TColumn colDesc = new TColumn(partCol.getName(), Type.STRING.toThrift()); resultSchema.addToColumns(colDesc); } resultSchema.addToColumns(new TColumn("#Rows", Type.BIGINT.toThrift())); resultSchema.addToColumns(new TColumn("#Files", Type.BIGINT.toThrift())); resultSchema.addToColumns(new TColumn("Size", Type.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Bytes Cached", Type.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Cache Replication", Type.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Format", Type.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Incremental stats", Type.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Location", Type.STRING.toThrift())); // Pretty print partitions and their stats. ArrayList<HdfsPartition> orderedPartitions = Lists.newArrayList(partitionMap_.values()); Collections.sort(orderedPartitions); long totalCachedBytes = 0L; for (HdfsPartition p : orderedPartitions) { // Ignore dummy default partition. if (p.isDefaultPartition()) continue; TResultRowBuilder rowBuilder = new TResultRowBuilder(); // Add the partition-key values (as strings for simplicity). for (LiteralExpr expr : p.getPartitionValues()) { rowBuilder.add(expr.getStringValue()); } // Add number of rows, files, bytes, cache stats, and file format. rowBuilder.add(p.getNumRows()).add(p.getFileDescriptors().size()).addBytes(p.getSize()); if (!p.isMarkedCached()) { // Helps to differentiate partitions that have 0B cached versus partitions // that are not marked as cached. rowBuilder.add("NOT CACHED"); rowBuilder.add("NOT CACHED"); } else { // Calculate the number the number of bytes that are cached. long cachedBytes = 0L; for (FileDescriptor fd : p.getFileDescriptors()) { for (THdfsFileBlock fb : fd.getFileBlocks()) { if (fb.getIs_replica_cached().contains(true)) { cachedBytes += fb.getLength(); } } } totalCachedBytes += cachedBytes; rowBuilder.addBytes(cachedBytes); // Extract cache replication factor from the parameters of the table // if the table is not partitioned or directly from the partition. Short rep = HdfsCachingUtil.getCachedCacheReplication( numClusteringCols_ == 0 ? p.getTable().getMetaStoreTable().getParameters() : p.getParameters()); rowBuilder.add(rep.toString()); } rowBuilder.add(p.getInputFormatDescriptor().getFileFormat().toString()); rowBuilder.add(String.valueOf(p.hasIncrementalStats())); rowBuilder.add(p.getLocation()); result.addToRows(rowBuilder.get()); } // For partitioned tables add a summary row at the bottom. if (numClusteringCols_ > 0) { TResultRowBuilder rowBuilder = new TResultRowBuilder(); int numEmptyCells = numClusteringCols_ - 1; rowBuilder.add("Total"); for (int i = 0; i < numEmptyCells; ++i) { rowBuilder.add(""); } // Total num rows, files, and bytes (leave format empty). rowBuilder.add(numRows_).add(numHdfsFiles_).addBytes(totalHdfsBytes_).addBytes(totalCachedBytes).add("") .add("").add("").add(""); result.addToRows(rowBuilder.get()); } return result; } /** * Returns files info for the given dbname/tableName and partition spec. * Returns files info for all partitions, if partition spec is null, ordered * by partition. */ public TResultSet getFiles(List<List<TPartitionKeyValue>> partitionSet) throws CatalogException { TResultSet result = new TResultSet(); TResultSetMetadata resultSchema = new TResultSetMetadata(); result.setSchema(resultSchema); resultSchema.addToColumns(new TColumn("Path", Type.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Size", Type.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Partition", Type.STRING.toThrift())); result.setRows(Lists.<TResultRow>newArrayList()); List<HdfsPartition> orderedPartitions; if (partitionSet == null) { orderedPartitions = Lists.newArrayList(partitionMap_.values()); } else { // Get a list of HdfsPartition objects for the given partition set. orderedPartitions = getPartitionsFromPartitionSet(partitionSet); } Collections.sort(orderedPartitions); for (HdfsPartition p : orderedPartitions) { List<FileDescriptor> orderedFds = Lists.newArrayList(p.getFileDescriptors()); Collections.sort(orderedFds); for (FileDescriptor fd : orderedFds) { TResultRowBuilder rowBuilder = new TResultRowBuilder(); rowBuilder.add(p.getLocation() + "/" + fd.getFileName()); rowBuilder.add(PrintUtils.printBytes(fd.getFileLength())); rowBuilder.add(p.getPartitionName()); result.addToRows(rowBuilder.get()); } } return result; } /** * Constructs a partition name from a list of TPartitionKeyValue objects. */ public static String constructPartitionName(List<TPartitionKeyValue> partitionSpec) { List<String> partitionCols = Lists.newArrayList(); List<String> partitionVals = Lists.newArrayList(); for (TPartitionKeyValue kv : partitionSpec) { partitionCols.add(kv.getName()); partitionVals.add(kv.getValue()); } return org.apache.hadoop.hive.common.FileUtils.makePartName(partitionCols, partitionVals); } /** * Reloads the metadata of partition 'oldPartition' by removing * it from the table and reconstructing it from the HMS partition object * 'hmsPartition'. If old partition is null then nothing is removed and * and partition constructed from 'hmsPartition' is simply added. */ public void reloadPartition(HdfsPartition oldPartition, Partition hmsPartition) throws CatalogException { HdfsPartition refreshedPartition = createAndLoadPartition(hmsPartition.getSd(), hmsPartition); Preconditions.checkArgument(oldPartition == null || oldPartition.compareTo(refreshedPartition) == 0); dropPartition(oldPartition); addPartition(refreshedPartition); } }