com.dropbox.presto.kafka.KafkaSplitManager.java Source code

Java tutorial

Introduction

Here is the source code for com.dropbox.presto.kafka.KafkaSplitManager.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.dropbox.presto.kafka;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import static com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Table;

import com.facebook.presto.hive.CachingHiveMetastore;
import com.dropbox.presto.kafka.KafkaHiveClient;
import com.facebook.presto.hive.HivePartition;
import com.facebook.presto.spi.ConnectorSplitManager;
import com.facebook.presto.spi.FixedSplitSource;
import com.facebook.presto.spi.Partition;
import com.facebook.presto.spi.PartitionResult;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.Split;
import com.facebook.presto.spi.SplitSource;
import com.facebook.presto.spi.TableHandle;
import com.facebook.presto.spi.TupleDomain;

import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.inject.Inject;

import com.dropbox.presto.kafka.ZKStringSerializer;

public class KafkaSplitManager implements ConnectorSplitManager {
    private final KafkaHiveClient hiveClient;
    private final String connectorId;
    private final KafkaClientConfig kafkaConfig;

    @Inject
    public KafkaSplitManager(KafkaHiveClient hiveClient, KafkaConnectorId connectorId,
            KafkaClientConfig kafkaConfig) {
        this.hiveClient = checkNotNull(hiveClient);
        this.connectorId = checkNotNull(connectorId).toString();
        this.kafkaConfig = checkNotNull(kafkaConfig);
    }

    @Override
    public String getConnectorId() {
        return this.connectorId;
    }

    @Override
    public boolean canHandle(TableHandle tableHandle) {
        return tableHandle instanceof KafkaTableHandle
                && ((KafkaTableHandle) tableHandle).isKafkaTable(connectorId);
    }

    private static SchemaTableName getTableName(TableHandle tableHandle) {
        checkArgument(tableHandle instanceof KafkaTableHandle, "tableHandle is not an instance of HiveTableHandle");
        return ((KafkaTableHandle) tableHandle).getSchemaTableName();
    }

    private CachingHiveMetastore metastore() {
        return this.hiveClient.getMetastore();
    }

    @Override
    public PartitionResult getPartitions(TableHandle tableHandle, TupleDomain tupleDomain) {
        checkNotNull(tableHandle, "tableHandle is null");
        SchemaTableName tableName = getTableName(tableHandle);
        Table table;
        try {
            table = metastore().getTable(tableName.getSchemaName(), tableName.getTableName());
            Map<String, String> props = table.getParameters();
            if (!KafkaTableHandle.kafkaTableMarkPresent(props)) {
                throw new RuntimeException("table not a kakfa mapped table.");
            }
        } catch (NoSuchObjectException e) {
            throw new RuntimeException(tableName + " not found.");
        }

        return hiveClient.getPartitions(tableHandle, tupleDomain);
    }

    @Override
    public SplitSource getPartitionSplits(TableHandle tableHandle, List<Partition> partitions) {
        checkNotNull(partitions, "partitions is null");

        Partition partition = Iterables.getFirst(partitions, null);
        if (partition != null) {
            checkArgument(partition instanceof HivePartition, "Partition must be a hive partition");
        }

        SchemaTableName tableName = getTableName(tableHandle);
        Table table = null;
        Iterable<org.apache.hadoop.hive.metastore.api.Partition> hivePartitions = null;
        try {
            table = metastore().getTable(tableName.getSchemaName(), tableName.getTableName());
            if (table.getPartitionKeys() != null && table.getPartitionKeys().size() > 0) {
                List<String> partitionNames = new ArrayList<String>(
                        Lists.transform(partitions, partitionIdGetter()));
                Collections.sort(partitionNames, Ordering.natural().reverse());
                hivePartitions = getPartitions(table, tableName, partitionNames);
            }
        } catch (NoSuchObjectException e) {
            throw new RuntimeException(tableName + " not found.");
        } catch (MetaException e) {
            throw new RuntimeException(tableName + " not found.");
        }

        return new KafkaSplitSourceProvider(connectorId, table, hivePartitions, kafkaConfig).get();
    }

    public static Function<Partition, String> partitionIdGetter() {
        return new Function<Partition, String>() {
            @Override
            public String apply(Partition input) {
                return input.getPartitionId();
            }
        };
    }

    private Iterable<org.apache.hadoop.hive.metastore.api.Partition> getPartitions(final Table table,
            final SchemaTableName tableName, List<String> partitionNames)
            throws NoSuchObjectException, MetaException {
        if (partitionNames.equals(ImmutableList.of(UNPARTITIONED_ID))) {
            throw new RuntimeException("Got unpartitioned table.");
        }

        List<org.apache.hadoop.hive.metastore.api.Partition> partitions = metastore()
                .getPartitionsByNames(tableName.getSchemaName(), tableName.getTableName(), partitionNames);

        return partitions;
    }
}