com.facebook.presto.hive.HivePartitionManager.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.presto.hive.HivePartitionManager.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive;

import com.facebook.presto.hive.metastore.HiveMetastore;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorTableHandle;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.TableNotFoundException;
import com.facebook.presto.spi.predicate.Domain;
import com.facebook.presto.spi.predicate.NullableValue;
import com.facebook.presto.spi.predicate.TupleDomain;
import com.facebook.presto.spi.predicate.ValueSet;
import com.google.common.base.Predicates;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import io.airlift.slice.Slice;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.metastore.ProtectMode;
import org.apache.hadoop.hive.metastore.api.Table;
import org.joda.time.DateTimeZone;

import javax.inject.Inject;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.hive.HiveBucketing.getHiveBucket;
import static com.facebook.presto.hive.HiveUtil.getPartitionKeyColumnHandles;
import static com.facebook.presto.hive.HiveUtil.parsePartitionValue;
import static com.facebook.presto.hive.util.Types.checkType;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Predicates.not;
import static com.google.common.base.Strings.isNullOrEmpty;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.hive.metastore.ProtectMode.getProtectModeFromString;

public class HivePartitionManager {
    public static final String PRESTO_OFFLINE = "presto_offline";
    private static final String PARTITION_VALUE_WILDCARD = "";

    private final String connectorId;
    private final DateTimeZone timeZone;
    private final boolean assumeCanonicalPartitionKeys;
    private final int domainCompactionThreshold;

    @Inject
    public HivePartitionManager(HiveConnectorId connectorId, HiveClientConfig hiveClientConfig) {
        this(connectorId, hiveClientConfig.getDateTimeZone(), hiveClientConfig.getMaxOutstandingSplits(),
                hiveClientConfig.isAssumeCanonicalPartitionKeys(), hiveClientConfig.getDomainCompactionThreshold());
    }

    public HivePartitionManager(HiveConnectorId connectorId, DateTimeZone timeZone, int maxOutstandingSplits,
            boolean assumeCanonicalPartitionKeys, int domainCompactionThreshold) {
        this.connectorId = requireNonNull(connectorId, "connectorId is null").toString();
        this.timeZone = requireNonNull(timeZone, "timeZone is null");
        checkArgument(maxOutstandingSplits >= 1, "maxOutstandingSplits must be at least 1");
        this.assumeCanonicalPartitionKeys = assumeCanonicalPartitionKeys;
        checkArgument(domainCompactionThreshold >= 1, "domainCompactionThreshold must be at least 1");
        this.domainCompactionThreshold = domainCompactionThreshold;
    }

    public HivePartitionResult getPartitions(ConnectorSession session, HiveMetastore metastore,
            ConnectorTableHandle tableHandle, TupleDomain<ColumnHandle> effectivePredicate) {
        HiveTableHandle hiveTableHandle = checkType(tableHandle, HiveTableHandle.class, "tableHandle");
        requireNonNull(effectivePredicate, "effectivePredicate is null");

        if (effectivePredicate.isNone()) {
            return new HivePartitionResult(ImmutableList.of(), TupleDomain.none(), TupleDomain.none());
        }

        SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
        Table table = getTable(metastore, tableName);
        Optional<HiveBucketing.HiveBucket> bucket = getHiveBucket(table,
                TupleDomain.extractFixedValues(effectivePredicate).get());

        TupleDomain<HiveColumnHandle> compactEffectivePredicate = toCompactTupleDomain(effectivePredicate,
                domainCompactionThreshold);

        if (table.getPartitionKeys().isEmpty()) {
            return new HivePartitionResult(
                    ImmutableList.of(new HivePartition(tableName, compactEffectivePredicate, bucket)),
                    effectivePredicate, TupleDomain.none());
        }

        List<HiveColumnHandle> partitionColumns = getPartitionKeyColumnHandles(connectorId, table);
        List<String> partitionNames = getFilteredPartitionNames(metastore, tableName, partitionColumns,
                effectivePredicate);

        // do a final pass to filter based on fields that could not be used to filter the partitions
        ImmutableList.Builder<HivePartition> partitions = ImmutableList.builder();
        for (String partitionName : partitionNames) {
            Optional<Map<ColumnHandle, NullableValue>> values = parseValuesAndFilterPartition(partitionName,
                    partitionColumns, effectivePredicate);

            if (values.isPresent()) {
                partitions.add(new HivePartition(tableName, compactEffectivePredicate, partitionName, values.get(),
                        bucket));
            }
        }

        // All partition key domains will be fully evaluated, so we don't need to include those
        TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.withColumnDomains(
                Maps.filterKeys(effectivePredicate.getDomains().get(), not(Predicates.in(partitionColumns))));
        TupleDomain<ColumnHandle> enforcedTupleDomain = TupleDomain.withColumnDomains(
                Maps.filterKeys(effectivePredicate.getDomains().get(), Predicates.in(partitionColumns)));
        return new HivePartitionResult(partitions.build(), remainingTupleDomain, enforcedTupleDomain);
    }

    private static TupleDomain<HiveColumnHandle> toCompactTupleDomain(TupleDomain<ColumnHandle> effectivePredicate,
            int threshold) {
        checkArgument(effectivePredicate.getDomains().isPresent());

        ImmutableMap.Builder<HiveColumnHandle, Domain> builder = ImmutableMap.builder();
        for (Map.Entry<ColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) {
            HiveColumnHandle hiveColumnHandle = checkType(entry.getKey(), HiveColumnHandle.class,
                    "ConnectorColumnHandle");

            ValueSet values = entry.getValue().getValues();
            ValueSet compactValueSet = values.getValuesProcessor().<Optional<ValueSet>>transform(
                    ranges -> ranges.getRangeCount() > threshold ? Optional.of(ValueSet.ofRanges(ranges.getSpan()))
                            : Optional.empty(),
                    discreteValues -> discreteValues.getValues().size() > threshold
                            ? Optional.of(ValueSet.all(values.getType()))
                            : Optional.empty(),
                    allOrNone -> Optional.empty()).orElse(values);
            builder.put(hiveColumnHandle, Domain.create(compactValueSet, entry.getValue().isNullAllowed()));
        }
        return TupleDomain.withColumnDomains(builder.build());
    }

    private Optional<Map<ColumnHandle, NullableValue>> parseValuesAndFilterPartition(String partitionName,
            List<HiveColumnHandle> partitionColumns, TupleDomain<ColumnHandle> predicate) {
        checkArgument(predicate.getDomains().isPresent());

        List<String> partitionValues = extractPartitionKeyValues(partitionName);

        Map<ColumnHandle, Domain> domains = predicate.getDomains().get();
        ImmutableMap.Builder<ColumnHandle, NullableValue> builder = ImmutableMap.builder();
        for (int i = 0; i < partitionColumns.size(); i++) {
            HiveColumnHandle column = partitionColumns.get(i);
            NullableValue parsedValue = parsePartitionValue(partitionName, partitionValues.get(i),
                    column.getHiveType(), timeZone);

            Domain allowedDomain = domains.get(column);
            if (allowedDomain != null && !allowedDomain.includesNullableValue(parsedValue.getValue())) {
                return Optional.empty();
            }
            builder.put(column, parsedValue);
        }

        return Optional.of(builder.build());
    }

    private Table getTable(HiveMetastore metastore, SchemaTableName tableName) {
        Optional<Table> target = metastore.getTable(tableName.getSchemaName(), tableName.getTableName());
        if (!target.isPresent()) {
            throw new TableNotFoundException(tableName);
        }
        Table table = target.get();

        String protectMode = table.getParameters().get(ProtectMode.PARAMETER_NAME);
        if (protectMode != null && getProtectModeFromString(protectMode).offline) {
            throw new TableOfflineException(tableName);
        }

        String prestoOffline = table.getParameters().get(PRESTO_OFFLINE);
        if (!isNullOrEmpty(prestoOffline)) {
            throw new TableOfflineException(tableName,
                    format("Table '%s' is offline for Presto: %s", tableName, prestoOffline));
        }

        return table;
    }

    private List<String> getFilteredPartitionNames(HiveMetastore metastore, SchemaTableName tableName,
            List<HiveColumnHandle> partitionKeys, TupleDomain<ColumnHandle> effectivePredicate) {
        checkArgument(effectivePredicate.getDomains().isPresent());

        List<String> filter = new ArrayList<>();
        for (HiveColumnHandle partitionKey : partitionKeys) {
            Domain domain = effectivePredicate.getDomains().get().get(partitionKey);
            if (domain != null && domain.isNullableSingleValue()) {
                Object value = domain.getNullableSingleValue();
                if (value == null) {
                    filter.add(HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION);
                } else if (value instanceof Slice) {
                    filter.add(((Slice) value).toStringUtf8());
                } else if ((value instanceof Boolean) || (value instanceof Double) || (value instanceof Long)) {
                    if (assumeCanonicalPartitionKeys) {
                        filter.add(value.toString());
                    } else {
                        // Hive treats '0', 'false', and 'False' the same. However, the metastore differentiates between these.
                        filter.add(PARTITION_VALUE_WILDCARD);
                    }
                } else {
                    throw new PrestoException(NOT_SUPPORTED,
                            "Only Boolean, Double and Long partition keys are supported");
                }
            } else {
                filter.add(PARTITION_VALUE_WILDCARD);
            }
        }

        // fetch the partition names
        return metastore.getPartitionNamesByParts(tableName.getSchemaName(), tableName.getTableName(), filter)
                .orElseThrow(() -> new TableNotFoundException(tableName));
    }

    public static List<String> extractPartitionKeyValues(String partitionName) {
        ImmutableList.Builder<String> values = ImmutableList.builder();

        boolean inKey = true;
        int valueStart = -1;
        for (int i = 0; i < partitionName.length(); i++) {
            char current = partitionName.charAt(i);
            if (inKey) {
                checkArgument(current != '/', "Invalid partition spec: %s", partitionName);
                if (current == '=') {
                    inKey = false;
                    valueStart = i + 1;
                }
            } else if (current == '/') {
                checkArgument(valueStart != -1, "Invalid partition spec: %s", partitionName);
                values.add(FileUtils.unescapePathName(partitionName.substring(valueStart, i)));
                inKey = true;
                valueStart = -1;
            }
        }
        checkArgument(!inKey, "Invalid partition spec: %s", partitionName);
        values.add(FileUtils.unescapePathName(partitionName.substring(valueStart, partitionName.length())));

        return values.build();
    }
}