org.apache.metron.parsers.topology.ParserTopologyBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.metron.parsers.topology.ParserTopologyBuilder.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.metron.parsers.topology;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import org.apache.commons.lang3.StringUtils;
import org.apache.curator.framework.CuratorFramework;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.metron.common.Constants;
import org.apache.metron.common.configuration.ConfigurationsUtils;
import org.apache.metron.common.configuration.ParserConfigurations;
import org.apache.metron.common.configuration.SensorParserConfig;
import org.apache.metron.common.configuration.writer.ParserWriterConfiguration;
import org.apache.metron.common.utils.KafkaUtils;
import org.apache.metron.common.utils.ReflectionUtils;
import org.apache.metron.common.writer.BulkMessageWriter;
import org.apache.metron.common.writer.MessageWriter;
import org.apache.metron.parsers.bolt.ParserBolt;
import org.apache.metron.parsers.bolt.WriterBolt;
import org.apache.metron.parsers.bolt.WriterHandler;
import org.apache.metron.parsers.filters.Filters;
import org.apache.metron.parsers.interfaces.MessageFilter;
import org.apache.metron.parsers.interfaces.MessageParser;
import org.apache.metron.parsers.topology.config.ValueSupplier;
import org.apache.metron.storm.kafka.flux.SimpleStormKafkaBuilder;
import org.apache.metron.storm.kafka.flux.SpoutConfiguration;
import org.apache.metron.storm.kafka.flux.StormKafkaSpout;
import org.apache.metron.writer.AbstractWriter;
import org.apache.metron.writer.kafka.KafkaWriter;
import org.apache.storm.Config;
import org.apache.storm.kafka.spout.KafkaSpout;
import org.apache.storm.kafka.spout.KafkaSpoutConfig;
import org.apache.storm.topology.BoltDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.json.simple.JSONObject;

/**
 * Builds a Storm topology that parses telemetry data received from a sensor.
 */
public class ParserTopologyBuilder {

    public static class ParserTopology {
        private TopologyBuilder builder;
        private Config topologyConfig;

        private ParserTopology(TopologyBuilder builder, Config topologyConfig) {
            this.builder = builder;
            this.topologyConfig = topologyConfig;
        }

        public TopologyBuilder getBuilder() {
            return builder;
        }

        public Config getTopologyConfig() {
            return topologyConfig;
        }
    }

    /**
     * Builds a Storm topology that parses telemetry data received from an external sensor.
     *
     * @param zookeeperUrl             Zookeeper URL
     * @param brokerUrl                Kafka Broker URL
     * @param sensorTypes               Type of sensor
     * @param spoutParallelismSupplier         Supplier for the parallelism hint for the spout
     * @param spoutNumTasksSupplier            Supplier for the number of tasks for the spout
     * @param parserParallelismSupplier        Supplier for the parallelism hint for the parser bolt
     * @param parserNumTasksSupplier           Supplier for the number of tasks for the parser bolt
     * @param errorWriterParallelismSupplier   Supplier for the parallelism hint for the bolt that handles errors
     * @param errorWriterNumTasksSupplier      Supplier for the number of tasks for the bolt that handles errors
     * @param kafkaSpoutConfigSupplier         Supplier for the configuration options for the kafka spout
     * @param securityProtocolSupplier         Supplier for the security protocol
     * @param outputTopicSupplier              Supplier for the output kafka topic
     * @param stormConfigSupplier              Supplier for the storm config
     * @return A Storm topology that parses telemetry data received from an external sensor
     * @throws Exception
     */
    public static ParserTopology build(String zookeeperUrl, Optional<String> brokerUrl, List<String> sensorTypes,
            ValueSupplier<List> spoutParallelismSupplier, ValueSupplier<List> spoutNumTasksSupplier,
            ValueSupplier<Integer> parserParallelismSupplier, ValueSupplier<Integer> parserNumTasksSupplier,
            ValueSupplier<Integer> errorWriterParallelismSupplier,
            ValueSupplier<Integer> errorWriterNumTasksSupplier, ValueSupplier<List> kafkaSpoutConfigSupplier,
            ValueSupplier<String> securityProtocolSupplier, ValueSupplier<String> outputTopicSupplier,
            ValueSupplier<String> errorTopicSupplier, ValueSupplier<Config> stormConfigSupplier) throws Exception {

        // fetch configuration from zookeeper
        ParserConfigurations configs = new ParserConfigurations();
        Map<String, SensorParserConfig> sensorToParserConfigs = getSensorParserConfig(zookeeperUrl, sensorTypes,
                configs);
        Collection<SensorParserConfig> parserConfigs = sensorToParserConfigs.values();

        @SuppressWarnings("unchecked")
        List<Integer> spoutParallelism = (List<Integer>) spoutParallelismSupplier.get(parserConfigs, List.class);
        @SuppressWarnings("unchecked")
        List<Integer> spoutNumTasks = (List<Integer>) spoutNumTasksSupplier.get(parserConfigs, List.class);
        int parserParallelism = parserParallelismSupplier.get(parserConfigs, Integer.class);
        int parserNumTasks = parserNumTasksSupplier.get(parserConfigs, Integer.class);
        int errorWriterParallelism = errorWriterParallelismSupplier.get(parserConfigs, Integer.class);
        int errorWriterNumTasks = errorWriterNumTasksSupplier.get(parserConfigs, Integer.class);
        String outputTopic = outputTopicSupplier.get(parserConfigs, String.class);

        List<Map<String, Object>> kafkaSpoutConfig = kafkaSpoutConfigSupplier.get(parserConfigs, List.class);
        Optional<String> securityProtocol = Optional
                .ofNullable(securityProtocolSupplier.get(parserConfigs, String.class));

        // create the spout
        TopologyBuilder builder = new TopologyBuilder();
        int i = 0;
        List<String> spoutIds = new ArrayList<>();
        for (Entry<String, SensorParserConfig> entry : sensorToParserConfigs.entrySet()) {
            KafkaSpout kafkaSpout = createKafkaSpout(zookeeperUrl, entry.getKey(), securityProtocol,
                    Optional.ofNullable(kafkaSpoutConfig.get(i)), entry.getValue());
            String spoutId = sensorToParserConfigs.size() > 1 ? "kafkaSpout-" + entry.getKey() : "kafkaSpout";
            builder.setSpout(spoutId, kafkaSpout, spoutParallelism.get(i)).setNumTasks(spoutNumTasks.get(i));
            spoutIds.add(spoutId);
            ++i;
        }

        // create the parser bolt
        ParserBolt parserBolt = createParserBolt(zookeeperUrl, brokerUrl, sensorToParserConfigs, securityProtocol,
                configs, Optional.ofNullable(outputTopic));

        BoltDeclarer boltDeclarer = builder.setBolt("parserBolt", parserBolt, parserParallelism)
                .setNumTasks(parserNumTasks);

        for (String spoutId : spoutIds) {
            boltDeclarer.localOrShuffleGrouping(spoutId);
        }

        // create the error bolt, if needed
        if (errorWriterNumTasks > 0) {
            String errorTopic = errorTopicSupplier.get(parserConfigs, String.class);
            WriterBolt errorBolt = createErrorBolt(zookeeperUrl, brokerUrl, sensorTypes.get(0), securityProtocol,
                    configs, parserConfigs.iterator().next(), errorTopic);
            builder.setBolt("errorMessageWriter", errorBolt, errorWriterParallelism)
                    .setNumTasks(errorWriterNumTasks).localOrShuffleGrouping("parserBolt", Constants.ERROR_STREAM);
        }

        return new ParserTopology(builder, stormConfigSupplier.get(parserConfigs, Config.class));
    }

    /**
     * Create a spout that consumes tuples from a Kafka topic.
     *
     * @param zkQuorum Zookeeper URL
     * @param sensorType              Type of sensor
     * @param kafkaConfigOptional     Configuration options for the kafka spout
     * @param parserConfig            Configuration for the parser
     * @return
     */
    private static StormKafkaSpout<Object, Object> createKafkaSpout(String zkQuorum, String sensorType,
            Optional<String> securityProtocol, Optional<Map<String, Object>> kafkaConfigOptional,
            SensorParserConfig parserConfig) {
        Map<String, Object> kafkaSpoutConfigOptions = kafkaConfigOptional.orElse(new HashMap<>());
        String inputTopic = parserConfig.getSensorTopic() != null ? parserConfig.getSensorTopic() : sensorType;
        kafkaSpoutConfigOptions.putIfAbsent(SpoutConfiguration.FIRST_POLL_OFFSET_STRATEGY.key,
                KafkaSpoutConfig.FirstPollOffsetStrategy.UNCOMMITTED_EARLIEST.name());
        kafkaSpoutConfigOptions.putIfAbsent(ConsumerConfig.GROUP_ID_CONFIG, inputTopic + "_parser");
        if (securityProtocol.isPresent()) {
            kafkaSpoutConfigOptions.putIfAbsent("security.protocol",
                    KafkaUtils.INSTANCE.normalizeProtocol(securityProtocol.get()));
        }
        return SimpleStormKafkaBuilder.create(inputTopic, zkQuorum,
                Arrays.asList(SimpleStormKafkaBuilder.FieldsConfiguration.VALUE.getFieldName(),
                        SimpleStormKafkaBuilder.FieldsConfiguration.KEY.getFieldName(),
                        SimpleStormKafkaBuilder.FieldsConfiguration.TOPIC.getFieldName()),
                kafkaSpoutConfigOptions);
    }

    /**
     * Create a Kafka writer.
     *
     * @param broker An optional URL to the Kafka brokers.
     * @param zkQuorum The URL to Zookeeper.
     * @param securityProtocol An optional security protocol in use.
     * @return
     */
    private static KafkaWriter createKafkaWriter(Optional<String> broker, String zkQuorum,
            Optional<String> securityProtocol) {
        KafkaWriter writer = new KafkaWriter();

        // cluster URL; either broker or zookeeper
        if (broker.isPresent()) {
            writer.withBrokerUrl(broker.get());

        } else {
            writer.withZkQuorum(zkQuorum);
        }

        // security protocol
        if (securityProtocol.isPresent()) {
            HashMap<String, Object> config = new HashMap<>();
            config.put("security.protocol", securityProtocol.get());
            writer.withProducerConfigs(config);
        }

        return writer;
    }

    /**
     * Create a bolt that parses input from a sensor.
     *
     * @param zookeeperUrl Zookeeper URL
     * @param brokerUrl    Kafka Broker URL
     * @param sensorTypeToParserConfig
     * @param configs
     * @return A Storm bolt that parses input from a sensor
     */
    private static ParserBolt createParserBolt(String zookeeperUrl, Optional<String> brokerUrl,
            Map<String, SensorParserConfig> sensorTypeToParserConfig, Optional<String> securityProtocol,
            ParserConfigurations configs, Optional<String> outputTopic) {

        Map<String, ParserComponents> parserBoltConfigs = new HashMap<>();
        for (Entry<String, SensorParserConfig> entry : sensorTypeToParserConfig.entrySet()) {
            String sensorType = entry.getKey();
            SensorParserConfig parserConfig = entry.getValue();
            // create message parser
            MessageParser<JSONObject> parser = ReflectionUtils.createInstance(parserConfig.getParserClassName());
            parser.configure(parserConfig.getParserConfig());

            // create message filter
            MessageFilter<JSONObject> filter = null;
            if (!StringUtils.isEmpty(parserConfig.getFilterClassName())) {
                filter = Filters.get(parserConfig.getFilterClassName(), parserConfig.getParserConfig());
            }

            // create a writer
            AbstractWriter writer;
            if (parserConfig.getWriterClassName() == null) {

                // if not configured, use a sensible default
                writer = createKafkaWriter(brokerUrl, zookeeperUrl, securityProtocol)
                        .withTopic(outputTopic.orElse(Constants.ENRICHMENT_TOPIC));

            } else {
                writer = ReflectionUtils.createInstance(parserConfig.getWriterClassName());
            }

            // configure it
            writer.configure(sensorType, new ParserWriterConfiguration(configs));

            // create a writer handler
            WriterHandler writerHandler = createWriterHandler(writer);

            ParserComponents components = new ParserComponents(parser, filter, writerHandler);
            parserBoltConfigs.put(sensorType, components);
        }

        return new ParserBolt(zookeeperUrl, parserBoltConfigs);
    }

    /**
     * Create a bolt that handles error messages.
     *
     * @param zookeeperUrl Kafka zookeeper URL
     * @param brokerUrl Kafka Broker URL
     * @param sensorType Type of sensor that is being consumed.
     * @param securityProtocol Security protocol used (if any)
     * @param configs
     * @param parserConfig The sensor's parser configuration.
     * @return A Storm bolt that handles error messages.
     */
    private static WriterBolt createErrorBolt(String zookeeperUrl, Optional<String> brokerUrl, String sensorType,
            Optional<String> securityProtocol, ParserConfigurations configs, SensorParserConfig parserConfig,
            String errorTopic) {

        // create a writer
        AbstractWriter writer;
        if (parserConfig.getErrorWriterClassName() == null) {

            if (errorTopic == null) {
                errorTopic = (String) configs.getGlobalConfig().get(Constants.PARSER_ERROR_TOPIC_GLOBALS_KEY);
            }

            // if not configured, uses a sensible default
            writer = createKafkaWriter(brokerUrl, zookeeperUrl, securityProtocol).withTopic(errorTopic)
                    .withConfigPrefix("error");

        } else {
            writer = ReflectionUtils.createInstance(parserConfig.getWriterClassName());
        }

        // configure it
        writer.configure(sensorType, new ParserWriterConfiguration(configs));

        // create a writer handler
        WriterHandler writerHandler = createWriterHandler(writer);

        return new WriterBolt(writerHandler, configs, sensorType).withErrorType(Constants.ErrorType.PARSER_ERROR);
    }

    /**
     * Fetch the parser configuration from Zookeeper.
     *
     * @param zookeeperUrl Zookeeper URL
     * @param sensorTypes Types of sensor
     * @param configs
     * @return
     * @throws Exception
     */
    private static Map<String, SensorParserConfig> getSensorParserConfig(String zookeeperUrl,
            List<String> sensorTypes, ParserConfigurations configs) throws Exception {
        Map<String, SensorParserConfig> parserConfigs = new HashMap<>();
        try (CuratorFramework client = ConfigurationsUtils.getClient(zookeeperUrl)) {
            client.start();
            ConfigurationsUtils.updateParserConfigsFromZookeeper(configs, client);
            for (String sensorType : sensorTypes) {
                SensorParserConfig parserConfig = configs.getSensorParserConfig(sensorType);
                if (parserConfig == null) {
                    throw new IllegalStateException("Cannot find the parser configuration in zookeeper for "
                            + sensorType + "."
                            + "  Please check that it exists in zookeeper by using the 'zk_load_configs.sh -m DUMP' command.");
                }
                parserConfigs.put(sensorType, parserConfig);
            }
        }
        return parserConfigs;
    }

    /**
     * Creates a WriterHandler
     *
     * @param writer The writer.
     * @return A WriterHandler
     */
    private static WriterHandler createWriterHandler(AbstractWriter writer) {

        if (writer instanceof BulkMessageWriter) {
            return new WriterHandler((BulkMessageWriter<JSONObject>) writer);
        } else if (writer instanceof MessageWriter) {
            return new WriterHandler((MessageWriter<JSONObject>) writer);
        } else {
            throw new IllegalStateException(
                    "Unable to create parser bolt: writer must be a MessageWriter or a BulkMessageWriter");
        }
    }
}