Java tutorial
// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package com.cloudera.impala.catalog; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; import javax.xml.bind.DatatypeConverter; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.log4j.Logger; import org.kududb.client.KuduClient; import org.kududb.client.LocatedTablet; import com.cloudera.impala.common.ImpalaRuntimeException; import com.cloudera.impala.thrift.TCatalogObjectType; import com.cloudera.impala.thrift.TColumn; import com.cloudera.impala.thrift.TKuduTable; import com.cloudera.impala.thrift.TResultSet; import com.cloudera.impala.thrift.TResultSetMetadata; import com.cloudera.impala.thrift.TTable; import com.cloudera.impala.thrift.TTableDescriptor; import com.cloudera.impala.thrift.TTableType; import com.cloudera.impala.util.KuduUtil; import com.cloudera.impala.util.TResultRowBuilder; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; /** * Impala representation of a Kudu table. * * The Kudu-related metadata is stored in the Metastore table's table properties. */ public class KuduTable extends Table { private static final Logger LOG = Logger.getLogger(Table.class); // Alias to the string key that identifies the storage handler for Kudu tables. public static final String KEY_STORAGE_HANDLER = hive_metastoreConstants.META_TABLE_STORAGE; // Key to access the table name from the table properties public static final String KEY_TABLE_NAME = "kudu.table_name"; // Key to access the columns used to build the (composite) key of the table. // The order of the keys is important. public static final String KEY_KEY_COLUMNS = "kudu.key_columns"; // Key to access the master address from the table properties. Error handling for // this string is done in the KuduClient library. // TODO we should have something like KuduConfig.getDefaultConfig() public static final String KEY_MASTER_ADDRESSES = "kudu.master_addresses"; // Kudu specific value for the storage handler table property keyed by // KEY_STORAGE_HANDLER. public static final String KUDU_STORAGE_HANDLER = "com.cloudera.kudu.hive.KuduStorageHandler"; // Key to specify the number of tablet replicas. // TODO(KUDU): Allow modification in alter table. public static final String KEY_TABLET_REPLICAS = "kudu.num_tablet_replicas"; public static final long KUDU_RPC_TIMEOUT_MS = 50000; // The name of the table in Kudu. private String kuduTableName_; // Comma separated list of Kudu master hosts with optional ports. private String kuduMasters_; // The set of columns that are key columns in Kudu. private ImmutableList<String> kuduKeyColumnNames_; protected KuduTable(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, Db db, String name, String owner) { super(id, msTable, db, name, owner); } public TKuduTable getKuduTable() { TKuduTable tbl = new TKuduTable(); tbl.setKey_columns(Preconditions.checkNotNull(kuduKeyColumnNames_)); tbl.setMaster_addresses(Lists.newArrayList(kuduMasters_.split(","))); tbl.setTable_name(kuduTableName_); return tbl; } @Override public TTableDescriptor toThriftDescriptor(Set<Long> referencedPartitions) { TTableDescriptor desc = new TTableDescriptor(id_.asInt(), TTableType.KUDU_TABLE, getTColumnDescriptors(), numClusteringCols_, kuduTableName_, db_.getName()); desc.setKuduTable(getKuduTable()); return desc; } @Override public TCatalogObjectType getCatalogObjectType() { return TCatalogObjectType.TABLE; } @Override public String getStorageHandlerClassName() { return KUDU_STORAGE_HANDLER; } /** * Returns the columns in the order they have been created */ @Override public ArrayList<Column> getColumnsInHiveOrder() { return getColumns(); } public static boolean isKuduTable(org.apache.hadoop.hive.metastore.api.Table mstbl) { return KUDU_STORAGE_HANDLER.equals(mstbl.getParameters().get(KEY_STORAGE_HANDLER)); } /** * Load the columns from the schema list */ private void loadColumns(List<FieldSchema> schema, HiveMetaStoreClient client, Set<String> keyColumns) throws TableLoadingException { if (keyColumns.size() == 0 || keyColumns.size() > schema.size()) { throw new TableLoadingException(String.format("Kudu tables must have at least one" + "key column (had %d), and no more key columns than there are table columns " + "(had %d).", keyColumns.size(), schema.size())); } clearColumns(); Set<String> columnNames = Sets.newHashSet(); int pos = 0; for (FieldSchema field : schema) { com.cloudera.impala.catalog.Type type = parseColumnType(field); // TODO(kudu-merge): Check for decimal types? boolean isKey = keyColumns.contains(field.getName()); KuduColumn col = new KuduColumn(field.getName(), isKey, !isKey, type, field.getComment(), pos); columnNames.add(col.getName()); addColumn(col); ++pos; } if (!columnNames.containsAll(keyColumns)) { throw new TableLoadingException(String.format( "Some key columns were not found in" + " the set of columns. List of column names: %s, List of key column names:" + " %s", Iterables.toString(columnNames), Iterables.toString(keyColumns))); } kuduKeyColumnNames_ = ImmutableList.copyOf(keyColumns); loadAllColumnStats(client); } @Override public void load(boolean reuseMetadata, HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException { // TODO handle 'reuseMetadata' if (getMetaStoreTable() == null || !tableParamsAreValid(msTbl.getParameters())) { throw new TableLoadingException(String.format("Cannot load Kudu table %s, table is corrupt.", name_)); } msTable_ = msTbl; kuduTableName_ = msTbl.getParameters().get(KEY_TABLE_NAME); kuduMasters_ = msTbl.getParameters().get(KEY_MASTER_ADDRESSES); String keyColumnsProp = Preconditions.checkNotNull(msTbl.getParameters().get(KEY_KEY_COLUMNS).toLowerCase(), "'kudu.key_columns' cannot be null."); Set<String> keyColumns = KuduUtil.parseKeyColumns(keyColumnsProp); // Load the rest of the data from the table parameters directly loadColumns(msTbl.getSd().getCols(), client, keyColumns); numClusteringCols_ = 0; // Get row count from stats numRows_ = getRowCount(getMetaStoreTable().getParameters()); } @Override public TTable toThrift() { TTable table = super.toThrift(); table.setTable_type(TTableType.KUDU_TABLE); table.setKudu_table(getKuduTable()); return table; } @Override protected void loadFromThrift(TTable thriftTable) throws TableLoadingException { super.loadFromThrift(thriftTable); TKuduTable tkudu = thriftTable.getKudu_table(); kuduTableName_ = tkudu.getTable_name(); kuduMasters_ = Joiner.on(',').join(tkudu.getMaster_addresses()); kuduKeyColumnNames_ = ImmutableList.copyOf(tkudu.getKey_columns()); } public String getKuduTableName() { return kuduTableName_; } public String getKuduMasterAddresses() { return kuduMasters_; } public int getNumKeyColumns() { return kuduKeyColumnNames_.size(); } /** * Returns true if all required parameters are present in the given table properties * map. * TODO(kudu-merge) Return a more specific error string. */ public static boolean tableParamsAreValid(Map<String, String> params) { return params.get(KEY_TABLE_NAME) != null && params.get(KEY_TABLE_NAME).length() > 0 && params.get(KEY_MASTER_ADDRESSES) != null && params.get(KEY_MASTER_ADDRESSES).length() > 0 && params.get(KEY_KEY_COLUMNS) != null && params.get(KEY_KEY_COLUMNS).length() > 0; } /** * The number of nodes is not know ahead of time and will be updated during computeStats * in the scan node. */ public int getNumNodes() { return -1; } public List<String> getKuduKeyColumnNames() { return kuduKeyColumnNames_; } public TResultSet getTableStats() throws ImpalaRuntimeException { TResultSet result = new TResultSet(); TResultSetMetadata resultSchema = new TResultSetMetadata(); result.setSchema(resultSchema); resultSchema.addToColumns(new TColumn("# Rows", Type.INT.toThrift())); resultSchema.addToColumns(new TColumn("Start Key", Type.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Stop Key", Type.STRING.toThrift())); resultSchema.addToColumns(new TColumn("Leader Replica", Type.STRING.toThrift())); resultSchema.addToColumns(new TColumn("# Replicas", Type.INT.toThrift())); try (KuduClient client = new KuduClient.KuduClientBuilder(getKuduMasterAddresses()).build()) { org.kududb.client.KuduTable kuduTable = client.openTable(kuduTableName_); List<LocatedTablet> tablets = kuduTable.getTabletsLocations(KUDU_RPC_TIMEOUT_MS); for (LocatedTablet tab : tablets) { TResultRowBuilder builder = new TResultRowBuilder(); builder.add("-1"); // The Kudu client API doesn't expose tablet row counts. builder.add(DatatypeConverter.printHexBinary(tab.getPartition().getPartitionKeyStart())); builder.add(DatatypeConverter.printHexBinary(tab.getPartition().getPartitionKeyEnd())); LocatedTablet.Replica leader = tab.getLeaderReplica(); if (leader == null) { // Leader might be null, if it is not yet available (e.g. during // leader election in Kudu) builder.add("Leader n/a"); } else { builder.add(leader.getRpcHost() + ":" + leader.getRpcPort().toString()); } builder.add(tab.getReplicas().size()); result.addToRows(builder.get()); } } catch (Exception e) { throw new ImpalaRuntimeException("Could not communicate with Kudu.", e); } return result; } }