org.apache.eagle.alert.engine.spout.CorrelationSpout.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.eagle.alert.engine.spout.CorrelationSpout.java

Source

/*
 *
 *  * Licensed to the Apache Software Foundation (ASF) under one or more
 *  * contributor license agreements.  See the NOTICE file distributed with
 *  * this work for additional information regarding copyright ownership.
 *  * The ASF licenses this file to You under the Apache License, Version 2.0
 *  * (the "License"); you may not use this file except in compliance with
 *  * the License.  You may obtain a copy of the License at
 *  *
 *  *    http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS,
 *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  * See the License for the specific language governing permissions and
 *  * limitations under the License.
 *
 */
package org.apache.eagle.alert.engine.spout;

import backtype.storm.spout.MultiScheme;
import backtype.storm.spout.Scheme;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import org.apache.commons.collections.CollectionUtils;
import org.apache.eagle.alert.coordination.model.Kafka2TupleMetadata;
import org.apache.eagle.alert.coordination.model.SpoutSpec;
import org.apache.eagle.alert.engine.coordinator.IMetadataChangeNotifyService;
import org.apache.eagle.alert.engine.coordinator.MetadataType;
import org.apache.eagle.alert.engine.coordinator.StreamDefinition;
import org.apache.eagle.alert.engine.router.SpoutSpecListener;
import org.apache.eagle.alert.engine.serialization.PartitionedEventSerializer;
import org.apache.eagle.alert.engine.serialization.SerializationMetadataProvider;
import org.apache.eagle.alert.engine.serialization.Serializers;
import org.apache.eagle.alert.utils.AlertConstants;
import org.apache.eagle.alert.utils.StreamIdConversion;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import storm.kafka.*;

import java.text.MessageFormat;
import java.util.*;

/**
 * wrap KafkaSpout to provide parallel processing of messages for multiple Kafka topics
 * <p>1. onNewConfig() is interface for outside to update new metadata. Upon new metadata, this class will calculate if there is any new topic, removed topic or
 * updated topic</p>
 */
public class CorrelationSpout extends BaseRichSpout implements SpoutSpecListener, SerializationMetadataProvider {
    private static final long serialVersionUID = -5280723341236671580L;
    private static final Logger LOG = LoggerFactory.getLogger(CorrelationSpout.class);

    public static final String DEFAULT_STORM_KAFKA_TRANSACTION_ZK_ROOT = "/consumers";
    public static final String DEFAULT_STORM_KAFKA_TRANSACTION_ZK_RELATIVE_PATH = "/eagle_consumer";

    // topic to KafkaSpoutWrapper
    private volatile Map<String, KafkaSpoutWrapper> kafkaSpoutList = new HashMap<>();
    private int numOfRouterBolts;

    private SpoutSpec cachedSpoutSpec;

    private transient KafkaSpoutMetric kafkaSpoutMetric;

    @SuppressWarnings("rawtypes")
    private Map conf;
    private TopologyContext context;
    private SpoutOutputCollector collector;
    private final Config config;
    private String topologyId;
    private String spoutName;
    private String routeBoltName;
    @SuppressWarnings("unused")
    private int taskIndex;
    private IMetadataChangeNotifyService changeNotifyService;
    private PartitionedEventSerializer serializer;
    private volatile Map<String, StreamDefinition> sds;

    /**
     * FIXME one single changeNotifyService may have issues as possibly multiple spout tasks will register themselves and initialize service.
     *
     * @param config
     * @param topologyId
     * @param changeNotifyService
     * @param numOfRouterBolts
     */
    public CorrelationSpout(Config config, String topologyId, IMetadataChangeNotifyService changeNotifyService,
            int numOfRouterBolts) {
        this(config, topologyId, changeNotifyService, numOfRouterBolts, AlertConstants.DEFAULT_SPOUT_NAME,
                AlertConstants.DEFAULT_ROUTERBOLT_NAME);
    }

    /**
     * @param config
     * @param topologyId       used for distinguishing kafka offset for different topologies
     * @param numOfRouterBolts used for generating streamId and routing
     * @param spoutName        used for generating streamId between spout and router bolt
     * @param routerBoltName   used for generating streamId between spout and router bolt.
     */
    public CorrelationSpout(Config config, String topologyId, IMetadataChangeNotifyService changeNotifyService,
            int numOfRouterBolts, String spoutName, String routerBoltName) {
        this.config = config;
        this.topologyId = topologyId;
        this.changeNotifyService = changeNotifyService;
        this.numOfRouterBolts = numOfRouterBolts;
        this.spoutName = spoutName;
        this.routeBoltName = routerBoltName;
    }

    public String getSpoutName() {
        return spoutName;
    }

    public String getRouteBoltName() {
        return routeBoltName;
    }

    /**
     * the only output field is for StreamEvent.
     *
     * @param declarer
     */
    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        for (int i = 0; i < numOfRouterBolts; i++) {
            String streamId = StreamIdConversion.generateStreamIdBetween(spoutName, routeBoltName + i);
            declarer.declareStream(streamId, new Fields(AlertConstants.FIELD_0));
            LOG.info("declare stream between spout and streamRouterBolt " + streamId);
        }
    }

    @SuppressWarnings("rawtypes")
    @Override
    public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("open method invoked");
        }
        this.conf = conf;
        this.context = context;
        this.collector = collector;
        this.taskIndex = context.getThisTaskIndex();

        // initialize an empty SpoutSpec
        cachedSpoutSpec = new SpoutSpec(topologyId, new HashMap<>(), new HashMap<>(), new HashMap<>());

        changeNotifyService.registerListener(this);
        changeNotifyService.init(config, MetadataType.SPOUT);

        // register KafkaSpout metric
        kafkaSpoutMetric = new KafkaSpoutMetric();
        context.registerMetric("kafkaSpout", kafkaSpoutMetric, 60);

        this.serializer = Serializers.newPartitionedEventSerializer(this);
    }

    @Override
    public void onSpoutSpecChange(SpoutSpec spec, Map<String, StreamDefinition> sds) {
        LOG.info("new metadata is updated " + spec);
        try {
            onReload(spec, sds);
        } catch (Exception ex) {
            LOG.error("error applying new SpoutSpec", ex);
        }
    }

    @Override
    public void nextTuple() {
        for (KafkaSpoutWrapper wrapper : kafkaSpoutList.values()) {
            wrapper.nextTuple();
        }
    }

    /**
     * find the correct wrapper to do ack that means msgId should be mapped to
     * wrapper.
     *
     * @param msgId
     */
    @Override
    public void ack(Object msgId) {
        // decode and get topic
        KafkaMessageIdWrapper id = (KafkaMessageIdWrapper) msgId;
        KafkaSpoutWrapper spout = kafkaSpoutList.get(id.topic);
        if (spout != null) {
            spout.ack(id.id);
        }
    }

    @Override
    public void fail(Object msgId) {
        // decode and get topic
        KafkaMessageIdWrapper id = (KafkaMessageIdWrapper) msgId;
        LOG.error("Failing message {}, with topic {}", msgId, id.topic);
        KafkaSpoutWrapper spout = kafkaSpoutList.get(id.topic);
        if (spout != null) {
            spout.fail(id.id);
        }
    }

    @Override
    public void deactivate() {
        System.out.println("deactivate");
        for (KafkaSpoutWrapper wrapper : kafkaSpoutList.values()) {
            wrapper.deactivate();
        }
    }

    @Override
    public void close() {
        System.out.println("close");
        for (KafkaSpoutWrapper wrapper : kafkaSpoutList.values()) {
            wrapper.close();
        }
    }

    private List<String> getTopics(SpoutSpec spoutSpec) {
        List<String> meta = new ArrayList<String>();
        for (Kafka2TupleMetadata entry : spoutSpec.getKafka2TupleMetadataMap().values()) {
            meta.add(entry.getTopic());
        }
        return meta;
    }

    @SuppressWarnings("unchecked")
    public void onReload(final SpoutSpec newMeta, Map<String, StreamDefinition> sds) throws Exception {
        // calculate topic create/remove/update
        List<String> topics = getTopics(newMeta);
        List<String> cachedTopcies = getTopics(cachedSpoutSpec);
        Collection<String> newTopics = CollectionUtils.subtract(topics, cachedTopcies);
        Collection<String> removeTopics = CollectionUtils.subtract(cachedTopcies, topics);
        Collection<String> updateTopics = CollectionUtils.intersection(topics, cachedTopcies);

        LOG.info("Topics were added={}, removed={}, modified={}", newTopics, removeTopics, updateTopics);

        // build lookup table for scheme
        Map<String, String> newSchemaName = new HashMap<String, String>();
        Map<String, Map<String, String>> dataSourceProperties = new HashMap<>();
        for (Kafka2TupleMetadata ds : newMeta.getKafka2TupleMetadataMap().values()) {
            newSchemaName.put(ds.getTopic(), ds.getSchemeCls());
            dataSourceProperties.put(ds.getTopic(), ds.getProperties());
        }

        // copy and swap
        Map<String, KafkaSpoutWrapper> newKafkaSpoutList = new HashMap<>(this.kafkaSpoutList);
        // iterate new topics and then create KafkaSpout
        for (String topic : newTopics) {
            KafkaSpoutWrapper wrapper = newKafkaSpoutList.get(topic);
            if (wrapper != null) {
                LOG.warn(MessageFormat.format(
                        "try to create new topic {0}, but found in the active spout list, this may indicate some inconsistency",
                        topic));
                continue;
            }
            KafkaSpoutWrapper newWrapper = createKafkaSpout(
                    ConfigFactory.parseMap(dataSourceProperties.get(topic)).withFallback(this.config), conf,
                    context, collector, topic, newSchemaName.get(topic), newMeta, sds);
            newKafkaSpoutList.put(topic, newWrapper);
        }
        // iterate remove topics and then close KafkaSpout
        for (String topic : removeTopics) {
            KafkaSpoutWrapper wrapper = newKafkaSpoutList.get(topic);
            if (wrapper == null) {
                LOG.warn(MessageFormat.format(
                        "try to remove topic {0}, but not found in the active spout list, this may indicate some inconsistency",
                        topic));
                continue;
            }
            removeKafkaSpout(wrapper);
            newKafkaSpoutList.remove(topic);
        }

        // iterate update topic and then update metadata
        for (String topic : updateTopics) {
            KafkaSpoutWrapper spoutWrapper = newKafkaSpoutList.get(topic);
            if (spoutWrapper == null) {
                LOG.warn(MessageFormat.format(
                        "try to update topic {0}, but not found in the active spout list, this may indicate some inconsistency",
                        topic));
                continue;
            }
            spoutWrapper.update(newMeta, sds);
        }

        // swap
        this.cachedSpoutSpec = newMeta;
        this.kafkaSpoutList = newKafkaSpoutList;
        this.sds = sds;
    }

    /**
     * make this method protected to make sure unit test can work well
     * Q: Where to persist consumer state, i.e. what offset has been consumed for each topic and partition
     * A: stormKafkaTransactionZkPath + "/" + consumerId + "/" + topic + "/" + topologyId + "/" + partitionId
     * Note1: PartitionManager.committedPath for composing zkState path,  _spoutConfig.zkRoot + "/" + _spoutConfig.id + "/" + _partition.getId();
     * consumerId by default is EagleConsumer unless it is specified by "stormKafkaEagleConsumer"
     * Note2: put topologyId as part of zkState because one topic by design can be consumed by multiple topologies so one topology needs to know
     * processed offset for itself
     * <p>TODO: Should avoid use Config.get in deep calling stack, should generate config bean as early as possible
     * </p>
     *
     * @param conf
     * @param context
     * @param collector
     * @param topic
     * @param spoutSpec
     * @return
     */
    @SuppressWarnings("rawtypes")
    protected KafkaSpoutWrapper createKafkaSpout(Config configure, Map conf, TopologyContext context,
            SpoutOutputCollector collector, final String topic, String schemeClsName, SpoutSpec spoutSpec,
            Map<String, StreamDefinition> sds) throws Exception {
        String kafkaBrokerZkQuorum = configure.getString(AlertConstants.KAFKA_BROKER_ZK_QUORUM);
        BrokerHosts hosts = null;
        if (configure.hasPath("spout.kafkaBrokerZkBasePath")) {
            hosts = new ZkHosts(kafkaBrokerZkQuorum, configure.getString(AlertConstants.KAFKA_BROKER_ZK_BASE_PATH));
        } else {
            hosts = new ZkHosts(kafkaBrokerZkQuorum);
        }
        String transactionZkRoot = DEFAULT_STORM_KAFKA_TRANSACTION_ZK_ROOT;
        if (configure.hasPath("spout.stormKafkaTransactionZkPath")) {
            transactionZkRoot = configure.getString("spout.stormKafkaTransactionZkPath");
        }
        boolean logEventEnabled = false;
        if (configure.hasPath("topology.logEventEnabled")) {
            logEventEnabled = configure.getBoolean("topology.logEventEnabled");
        }
        // write partition offset etc. into zkRoot+id, see PartitionManager.committedPath
        String zkStateTransactionRelPath = DEFAULT_STORM_KAFKA_TRANSACTION_ZK_RELATIVE_PATH;
        if (configure.hasPath("spout.stormKafkaEagleConsumer")) {
            zkStateTransactionRelPath = configure.getString("spout.stormKafkaEagleConsumer");
        }
        SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, transactionZkRoot,
                zkStateTransactionRelPath + "/" + topic + "/" + topologyId);
        // transaction zkServers
        boolean stormKafkaUseSameZkQuorumWithKafkaBroker = configure
                .getBoolean("spout.stormKafkaUseSameZkQuorumWithKafkaBroker");
        if (stormKafkaUseSameZkQuorumWithKafkaBroker) {
            ZkServerPortUtils utils = new ZkServerPortUtils(kafkaBrokerZkQuorum);
            spoutConfig.zkServers = utils.getZkHosts();
            spoutConfig.zkPort = utils.getZkPort();
        } else {
            ZkServerPortUtils utils = new ZkServerPortUtils(
                    configure.getString("spout.stormKafkaTransactionZkQuorum"));
            spoutConfig.zkServers = utils.getZkHosts();
            spoutConfig.zkPort = utils.getZkPort();
        }
        // transaction update interval
        spoutConfig.stateUpdateIntervalMs = configure.hasPath("spout.stormKafkaStateUpdateIntervalMs")
                ? configure.getInt("spout.stormKafkaStateUpdateIntervalMs")
                : 2000;
        // Kafka fetch size
        spoutConfig.fetchSizeBytes = configure.hasPath("spout.stormKafkaFetchSizeBytes")
                ? configure.getInt("spout.stormKafkaFetchSizeBytes")
                : 1048586;
        // "startOffsetTime" is for test usage, prod should not use this
        if (configure.hasPath("spout.stormKafkaStartOffsetTime")) {
            spoutConfig.startOffsetTime = configure.getInt("spout.stormKafkaStartOffsetTime");
        }

        spoutConfig.scheme = createMultiScheme(conf, topic, schemeClsName);
        KafkaSpoutWrapper wrapper = new KafkaSpoutWrapper(spoutConfig, kafkaSpoutMetric);
        SpoutOutputCollectorWrapper collectorWrapper = new SpoutOutputCollectorWrapper(this, collector, topic,
                spoutSpec, numOfRouterBolts, sds, this.serializer, logEventEnabled);
        wrapper.open(conf, context, collectorWrapper);

        if (LOG.isInfoEnabled()) {
            LOG.info("create and open kafka wrapper: topic {}, scheme class{} ", topic, schemeClsName);
        }
        return wrapper;
    }

    private MultiScheme createMultiScheme(Map conf, String topic, String schemeClsName) throws Exception {
        Object scheme = SchemeBuilder.buildFromClsName(schemeClsName, topic, conf);
        if (scheme instanceof MultiScheme) {
            return (MultiScheme) scheme;
        } else if (scheme instanceof Scheme) {
            return new SchemeAsMultiScheme((Scheme) scheme);
        } else {
            LOG.error("create spout scheme failed.");
            throw new IllegalArgumentException("create spout scheme failed.");
        }
    }

    @Override
    public StreamDefinition getStreamDefinition(String streamId) {
        return sds.get(streamId);
    }

    /**
     * utility to get list of zkServers and zkPort.(It is assumed that zkPort is same for all zkServers as storm-kafka library requires this though it is not efficient)
     */
    private static class ZkServerPortUtils {
        private List<String> zkHosts = new ArrayList<>();
        private Integer zkPort;

        public ZkServerPortUtils(String zkQuorum) {
            String[] zkConnections = zkQuorum.split(",");
            for (String zkConnection : zkConnections) {
                zkHosts.add(zkConnection.split(":")[0]);
            }
            zkPort = Integer.valueOf(zkConnections[0].split(":")[1]);
        }

        public List<String> getZkHosts() {
            return zkHosts;
        }

        public Integer getZkPort() {
            return zkPort;
        }
    }

    protected void removeKafkaSpout(KafkaSpoutWrapper wrapper) {
        try {
            wrapper.close();
        } catch (Exception e) {
            LOG.error("Close wrapper failed. Ignore and continue!", e);
        }
    }
}