Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.atlas.storm.hook; import org.apache.storm.ISubmitterHook; import org.apache.storm.generated.Bolt; import org.apache.storm.generated.SpoutSpec; import org.apache.storm.generated.StormTopology; import org.apache.storm.generated.TopologyInfo; import org.apache.storm.utils.Utils; import org.apache.atlas.AtlasClient; import org.apache.atlas.AtlasConstants; import org.apache.atlas.hive.bridge.HiveMetaStoreBridge; import org.apache.atlas.hook.AtlasHook; import org.apache.atlas.storm.model.StormDataTypes; import org.apache.atlas.typesystem.Referenceable; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hive.conf.HiveConf; import org.slf4j.Logger; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.Date; /** * StormAtlasHook sends storm topology metadata information to Atlas * via a Kafka Broker for durability. * <p/> * This is based on the assumption that the same topology name is used * for the various lifecycle stages. */ public class StormAtlasHook extends AtlasHook implements ISubmitterHook { public static final Logger LOG = org.slf4j.LoggerFactory.getLogger(StormAtlasHook.class); private static final String CONF_PREFIX = "atlas.hook.storm."; private static final String HOOK_NUM_RETRIES = CONF_PREFIX + "numRetries"; // will be used for owner if Storm topology does not contain the owner instance // possible if Storm is running in unsecure mode. public static final String ANONYMOUS_OWNER = "anonymous"; public static final String HBASE_NAMESPACE_DEFAULT = "default"; @Override protected String getNumberOfRetriesPropertyKey() { return HOOK_NUM_RETRIES; } /** * This is the client-side hook that storm fires when a topology is added. * * @param topologyInfo topology info * @param stormConf configuration * @param stormTopology a storm topology * @throws IllegalAccessException */ @Override public void notify(TopologyInfo topologyInfo, Map stormConf, StormTopology stormTopology) throws IllegalAccessException { LOG.info("Collecting metadata for a new storm topology: {}", topologyInfo.get_name()); try { ArrayList<Referenceable> entities = new ArrayList<>(); Referenceable topologyReferenceable = createTopologyInstance(topologyInfo, stormConf); List<Referenceable> dependentEntities = addTopologyDataSets(stormTopology, topologyReferenceable, topologyInfo.get_owner(), stormConf); if (dependentEntities.size() > 0) { entities.addAll(dependentEntities); } // create the graph for the topology ArrayList<Referenceable> graphNodes = createTopologyGraph(stormTopology, stormTopology.get_spouts(), stormTopology.get_bolts()); // add the connection from topology to the graph topologyReferenceable.set("nodes", graphNodes); entities.add(topologyReferenceable); LOG.debug("notifying entities, size = {}", entities.size()); String user = getUser(topologyInfo.get_owner(), null); notifyEntities(user, entities); } catch (Exception e) { throw new RuntimeException("Atlas hook is unable to process the topology.", e); } } private Referenceable createTopologyInstance(TopologyInfo topologyInfo, Map stormConf) throws Exception { Referenceable topologyReferenceable = new Referenceable(StormDataTypes.STORM_TOPOLOGY.getName()); topologyReferenceable.set("id", topologyInfo.get_id()); topologyReferenceable.set(AtlasClient.NAME, topologyInfo.get_name()); topologyReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, topologyInfo.get_name()); String owner = topologyInfo.get_owner(); if (StringUtils.isEmpty(owner)) { owner = ANONYMOUS_OWNER; } topologyReferenceable.set(AtlasClient.OWNER, owner); topologyReferenceable.set("startTime", new Date(System.currentTimeMillis())); topologyReferenceable.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, getClusterName(stormConf)); return topologyReferenceable; } private List<Referenceable> addTopologyDataSets(StormTopology stormTopology, Referenceable topologyReferenceable, String topologyOwner, Map stormConf) throws Exception { List<Referenceable> dependentEntities = new ArrayList<>(); // add each spout as an input data set addTopologyInputs(topologyReferenceable, stormTopology.get_spouts(), stormConf, topologyOwner, dependentEntities); // add the appropriate bolts as output data sets addTopologyOutputs(topologyReferenceable, stormTopology, topologyOwner, stormConf, dependentEntities); return dependentEntities; } private void addTopologyInputs(Referenceable topologyReferenceable, Map<String, SpoutSpec> spouts, Map stormConf, String topologyOwner, List<Referenceable> dependentEntities) throws IllegalAccessException { final ArrayList<Referenceable> inputDataSets = new ArrayList<>(); for (Map.Entry<String, SpoutSpec> entry : spouts.entrySet()) { Serializable instance = Utils.javaDeserialize(entry.getValue().get_spout_object().get_serialized_java(), Serializable.class); String simpleName = instance.getClass().getSimpleName(); final Referenceable datasetRef = createDataSet(simpleName, topologyOwner, instance, stormConf, dependentEntities); if (datasetRef != null) { inputDataSets.add(datasetRef); } } topologyReferenceable.set("inputs", inputDataSets); } private void addTopologyOutputs(Referenceable topologyReferenceable, StormTopology stormTopology, String topologyOwner, Map stormConf, List<Referenceable> dependentEntities) throws Exception { final ArrayList<Referenceable> outputDataSets = new ArrayList<>(); Map<String, Bolt> bolts = stormTopology.get_bolts(); Set<String> terminalBoltNames = StormTopologyUtil.getTerminalUserBoltNames(stormTopology); for (String terminalBoltName : terminalBoltNames) { Serializable instance = Utils.javaDeserialize( bolts.get(terminalBoltName).get_bolt_object().get_serialized_java(), Serializable.class); String dataSetType = instance.getClass().getSimpleName(); final Referenceable datasetRef = createDataSet(dataSetType, topologyOwner, instance, stormConf, dependentEntities); if (datasetRef != null) { outputDataSets.add(datasetRef); } } topologyReferenceable.set("outputs", outputDataSets); } private Referenceable createDataSet(String name, String topologyOwner, Serializable instance, Map stormConf, List<Referenceable> dependentEntities) throws IllegalAccessException { Map<String, String> config = StormTopologyUtil.getFieldValues(instance, true, null); String clusterName = null; Referenceable dataSetReferenceable; // todo: need to redo this with a config driven approach switch (name) { case "KafkaSpout": dataSetReferenceable = new Referenceable(StormDataTypes.KAFKA_TOPIC.getName()); final String topicName = config.get("KafkaSpout._spoutConfig.topic"); dataSetReferenceable.set("topic", topicName); dataSetReferenceable.set("uri", config.get("KafkaSpout._spoutConfig.hosts.brokerZkStr")); if (StringUtils.isEmpty(topologyOwner)) { topologyOwner = ANONYMOUS_OWNER; } dataSetReferenceable.set(AtlasClient.OWNER, topologyOwner); dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getKafkaTopicQualifiedName(getClusterName(stormConf), topicName)); dataSetReferenceable.set(AtlasClient.NAME, topicName); break; case "HBaseBolt": dataSetReferenceable = new Referenceable(StormDataTypes.HBASE_TABLE.getName()); final String hbaseTableName = config.get("HBaseBolt.tableName"); dataSetReferenceable.set("uri", stormConf.get("hbase.rootdir")); dataSetReferenceable.set(AtlasClient.NAME, hbaseTableName); dataSetReferenceable.set(AtlasClient.OWNER, stormConf.get("storm.kerberos.principal")); clusterName = extractComponentClusterName(HBaseConfiguration.create(), stormConf); //TODO - Hbase Namespace is hardcoded to 'default'. need to check how to get this or is it already part of tableName dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getHbaseTableQualifiedName(clusterName, HBASE_NAMESPACE_DEFAULT, hbaseTableName)); break; case "HdfsBolt": dataSetReferenceable = new Referenceable(HiveMetaStoreBridge.HDFS_PATH); String hdfsUri = config.get("HdfsBolt.rotationActions") == null ? config.get("HdfsBolt.fileNameFormat.path") : config.get("HdfsBolt.rotationActions"); final String hdfsPathStr = config.get("HdfsBolt.fsUrl") + hdfsUri; dataSetReferenceable.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, getClusterName(stormConf)); dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, hdfsPathStr); dataSetReferenceable.set("path", hdfsPathStr); dataSetReferenceable.set(AtlasClient.OWNER, stormConf.get("hdfs.kerberos.principal")); final Path hdfsPath = new Path(hdfsPathStr); dataSetReferenceable.set(AtlasClient.NAME, Path.getPathWithoutSchemeAndAuthority(hdfsPath).toString().toLowerCase()); break; case "HiveBolt": // todo: verify if hive table has everything needed to retrieve existing table Referenceable dbReferenceable = new Referenceable("hive_db"); String databaseName = config.get("HiveBolt.options.databaseName"); dbReferenceable.set(AtlasClient.NAME, databaseName); dbReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getDBQualifiedName(getClusterName(stormConf), databaseName)); dbReferenceable.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, getClusterName(stormConf)); dependentEntities.add(dbReferenceable); clusterName = extractComponentClusterName(new HiveConf(), stormConf); final String hiveTableName = config.get("HiveBolt.options.tableName"); dataSetReferenceable = new Referenceable("hive_table"); final String tableQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(clusterName, databaseName, hiveTableName); dataSetReferenceable.set(AtlasClient.NAME, hiveTableName); dataSetReferenceable.set(HiveMetaStoreBridge.DB, dbReferenceable); dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName); break; default: // custom node - create a base dataset class with name attribute //TODO - What should we do for custom data sets. Not sure what name we can set here? return null; } dependentEntities.add(dataSetReferenceable); return dataSetReferenceable; } private String extractComponentClusterName(Configuration configuration, Map stormConf) { String clusterName = configuration.get(AtlasConstants.CLUSTER_NAME_KEY, null); if (clusterName == null) { clusterName = getClusterName(stormConf); } return clusterName; } private ArrayList<Referenceable> createTopologyGraph(StormTopology stormTopology, Map<String, SpoutSpec> spouts, Map<String, Bolt> bolts) throws Exception { // Add graph of nodes in the topology final Map<String, Referenceable> nodeEntities = new HashMap<>(); addSpouts(spouts, nodeEntities); addBolts(bolts, nodeEntities); addGraphConnections(stormTopology, nodeEntities); ArrayList<Referenceable> nodes = new ArrayList<>(); nodes.addAll(nodeEntities.values()); return nodes; } private void addSpouts(Map<String, SpoutSpec> spouts, Map<String, Referenceable> nodeEntities) throws IllegalAccessException { for (Map.Entry<String, SpoutSpec> entry : spouts.entrySet()) { final String spoutName = entry.getKey(); Referenceable spoutReferenceable = createSpoutInstance(spoutName, entry.getValue()); nodeEntities.put(spoutName, spoutReferenceable); } } private Referenceable createSpoutInstance(String spoutName, SpoutSpec stormSpout) throws IllegalAccessException { Referenceable spoutReferenceable = new Referenceable(StormDataTypes.STORM_SPOUT.getName()); spoutReferenceable.set(AtlasClient.NAME, spoutName); Serializable instance = Utils.javaDeserialize(stormSpout.get_spout_object().get_serialized_java(), Serializable.class); spoutReferenceable.set("driverClass", instance.getClass().getName()); Map<String, String> flatConfigMap = StormTopologyUtil.getFieldValues(instance, true, null); spoutReferenceable.set("conf", flatConfigMap); return spoutReferenceable; } private void addBolts(Map<String, Bolt> bolts, Map<String, Referenceable> nodeEntities) throws IllegalAccessException { for (Map.Entry<String, Bolt> entry : bolts.entrySet()) { Referenceable boltInstance = createBoltInstance(entry.getKey(), entry.getValue()); nodeEntities.put(entry.getKey(), boltInstance); } } private Referenceable createBoltInstance(String boltName, Bolt stormBolt) throws IllegalAccessException { Referenceable boltReferenceable = new Referenceable(StormDataTypes.STORM_BOLT.getName()); boltReferenceable.set(AtlasClient.NAME, boltName); Serializable instance = Utils.javaDeserialize(stormBolt.get_bolt_object().get_serialized_java(), Serializable.class); boltReferenceable.set("driverClass", instance.getClass().getName()); Map<String, String> flatConfigMap = StormTopologyUtil.getFieldValues(instance, true, null); boltReferenceable.set("conf", flatConfigMap); return boltReferenceable; } private void addGraphConnections(StormTopology stormTopology, Map<String, Referenceable> nodeEntities) throws Exception { // adds connections between spouts and bolts Map<String, Set<String>> adjacencyMap = StormTopologyUtil.getAdjacencyMap(stormTopology, true); for (Map.Entry<String, Set<String>> entry : adjacencyMap.entrySet()) { String nodeName = entry.getKey(); Set<String> adjacencyList = adjacencyMap.get(nodeName); if (adjacencyList == null || adjacencyList.isEmpty()) { continue; } // add outgoing links Referenceable node = nodeEntities.get(nodeName); ArrayList<String> outputs = new ArrayList<>(adjacencyList.size()); outputs.addAll(adjacencyList); node.set("outputs", outputs); // add incoming links for (String adjacentNodeName : adjacencyList) { Referenceable adjacentNode = nodeEntities.get(adjacentNodeName); @SuppressWarnings("unchecked") ArrayList<String> inputs = (ArrayList<String>) adjacentNode.get("inputs"); if (inputs == null) { inputs = new ArrayList<>(); } inputs.add(nodeName); adjacentNode.set("inputs", inputs); } } } public static String getKafkaTopicQualifiedName(String clusterName, String topicName) { return String.format("%s@%s", topicName, clusterName); } public static String getHbaseTableQualifiedName(String clusterName, String nameSpace, String tableName) { return String.format("%s.%s@%s", nameSpace, tableName, clusterName); } private String getClusterName(Map stormConf) { return atlasProperties.getString(AtlasConstants.CLUSTER_NAME_KEY, AtlasConstants.DEFAULT_CLUSTER_NAME); } }