org.apache.samza.config.KafkaConsumerConfig.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.samza.config.KafkaConsumerConfig.java

Source

/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 */

package org.apache.samza.config;

import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.RangeAssignor;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.apache.samza.SamzaException;
import org.apache.samza.execution.JobPlanner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Option;

/**
 * The configuration class for KafkaConsumer
 */
public class KafkaConsumerConfig extends HashMap<String, Object> {

    public static final Logger LOG = LoggerFactory.getLogger(KafkaConsumerConfig.class);

    public static final String ZOOKEEPER_CONNECT = "zookeeper.connect";
    private static final int FETCH_MAX_BYTES = 1024 * 1024;

    private final String systemName;
    /*
     * By default, KafkaConsumer will fetch some big number of available messages for all the partitions.
     * This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
     */
    static final String DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS = "100";

    private KafkaConsumerConfig(Map<String, Object> props, String systemName) {
        super(props);
        this.systemName = systemName;
    }

    /**
     * Create kafka consumer configs, based on the subset of global configs.
     * @param config application config
     * @param systemName system name
     * @param clientId client id provided by the caller
     * @return KafkaConsumerConfig
     */
    public static KafkaConsumerConfig getKafkaSystemConsumerConfig(Config config, String systemName,
            String clientId) {

        Config subConf = config.subset(String.format("systems.%s.consumer.", systemName), true);

        final String groupId = createConsumerGroupId(config);

        Map<String, Object> consumerProps = new HashMap<>(subConf);

        consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
        consumerProps.put(ConsumerConfig.CLIENT_ID_CONFIG, clientId);

        // These are values we enforce in sazma, and they cannot be overwritten.

        // Disable consumer auto-commit because Samza controls commits
        consumerProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");

        // check if samza default offset value is defined
        String systemOffsetDefault = new SystemConfig(config).getSystemOffsetDefault(systemName);

        // Translate samza config value to kafka config value
        String autoOffsetReset = getAutoOffsetResetValue(
                (String) consumerProps.get(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG), systemOffsetDefault);
        LOG.info("setting auto.offset.reset for system {} to {}", systemName, autoOffsetReset);
        consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, autoOffsetReset);

        // if consumer bootstrap servers are not configured, get them from the producer configs
        if (!subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
            String bootstrapServers = config.get(
                    String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
            if (StringUtils.isEmpty(bootstrapServers)) {
                throw new SamzaException(
                        "Missing " + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " config  for " + systemName);
            }
            consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
        }

        // Always use default partition assignment strategy. Do not allow override.
        consumerProps.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, RangeAssignor.class.getName());

        // the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
        // default to byte[]
        if (!consumerProps.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
            LOG.info("setting key serialization for the consumer(for system {}) to ByteArrayDeserializer",
                    systemName);
            consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
        }
        if (!consumerProps.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
            LOG.info("setting value serialization for the consumer(for system {}) to ByteArrayDeserializer",
                    systemName);
            consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
                    ByteArrayDeserializer.class.getName());
        }

        // Override default max poll config if there is no value
        consumerProps.putIfAbsent(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS);

        return new KafkaConsumerConfig(consumerProps, systemName);
    }

    public String getClientId() {
        String clientId = (String) get(ConsumerConfig.CLIENT_ID_CONFIG);
        if (StringUtils.isBlank(clientId)) {
            throw new SamzaException("client Id is not set for consumer for system=" + systemName);
        }
        return clientId;
    }

    public int fetchMessageMaxBytes() {
        String fetchSize = (String) get("fetch.message.max.bytes");
        if (StringUtils.isBlank(fetchSize)) {
            return FETCH_MAX_BYTES;
        } else {
            return Integer.valueOf(fetchSize);
        }
    }

    public String getZkConnect() {
        return (String) get(ZOOKEEPER_CONNECT);
    }

    // group id should be unique per job
    static String createConsumerGroupId(Config config) {
        Pair<String, String> jobNameId = getJobNameAndId(config);

        return String.format("%s-%s", jobNameId.getLeft(), jobNameId.getRight());
    }

    // client id should be unique per job
    public static String createClientId(String prefix, Config config) {

        Pair<String, String> jobNameId = getJobNameAndId(config);
        String jobName = jobNameId.getLeft();
        String jobId = jobNameId.getRight();
        return String.format("%s-%s-%s", prefix.replaceAll("\\W", "_"), jobName.replaceAll("\\W", "_"),
                jobId.replaceAll("\\W", "_"));
    }

    public static Pair<String, String> getJobNameAndId(Config config) {
        JobConfig jobConfig = new JobConfig(JobPlanner.generateSingleJobConfig(config));
        Option jobNameOption = jobConfig.getName();
        if (jobNameOption.isEmpty()) {
            throw new ConfigException("Missing job name");
        }
        String jobName = (String) jobNameOption.get();
        return new ImmutablePair<>(jobName, jobConfig.getJobId());
    }

    /**
     * If settings for Kafka Consumer auto.offset.reset is set - use it.
     * If this setting is using the old (deprecated values) - translate them:
     * "largest" -> "latest"
     * "smallest" -> "earliest"
     *
     * If no setting specified - match it to the Samza default offset setting.
     * If none defined - return "latest"
     * @param autoOffsetReset consumer.auto.offset.reset config
     * @param samzaOffsetDefault  samza system default
     * @return String representing the config value for "auto.offset.reset" property
     */
    static String getAutoOffsetResetValue(final String autoOffsetReset, final String samzaOffsetDefault) {
        // valid kafka consumer values
        final String KAFKA_OFFSET_LATEST = "latest";
        final String KAFKA_OFFSET_EARLIEST = "earliest";
        final String KAFKA_OFFSET_NONE = "none";

        // if the value for KafkaConsumer is set - use it.
        if (!StringUtils.isBlank(autoOffsetReset)) {
            if (autoOffsetReset.equals(KAFKA_OFFSET_EARLIEST) || autoOffsetReset.equals(KAFKA_OFFSET_LATEST)
                    || autoOffsetReset.equals(KAFKA_OFFSET_NONE)) {
                return autoOffsetReset;
            }
            // translate old kafka consumer values into new ones (SAMZA-1987 top remove it)
            String newAutoOffsetReset;
            switch (autoOffsetReset) {
            case "largest":
                newAutoOffsetReset = KAFKA_OFFSET_LATEST;
                LOG.warn(
                        "Using old (deprecated) value for kafka consumer config auto.offset.reset = {}. The right value should be {}",
                        autoOffsetReset, KAFKA_OFFSET_LATEST);
                break;
            case "smallest":
                newAutoOffsetReset = KAFKA_OFFSET_EARLIEST;
                LOG.warn(
                        "Using old (deprecated) value for kafka consumer config auto.offset.reset = {}. The right value should be {}",
                        autoOffsetReset, KAFKA_OFFSET_EARLIEST);
                break;
            default:
                throw new SamzaException("Using invalid value for kafka consumer config auto.offset.reset "
                        + autoOffsetReset + ". See KafkaConsumer config for the correct values.");
            }

            LOG.info("Auto offset reset value converted from {} to {}", autoOffsetReset, newAutoOffsetReset);
            return newAutoOffsetReset;
        }

        // in case kafka consumer configs are not provided we should match them to Samza's ones.
        String newAutoOffsetReset = KAFKA_OFFSET_LATEST;
        if (!StringUtils.isBlank(samzaOffsetDefault)) {
            switch (samzaOffsetDefault) {
            case SystemConfig.SAMZA_SYSTEM_OFFSET_UPCOMING:
                newAutoOffsetReset = KAFKA_OFFSET_LATEST;
                break;
            case SystemConfig.SAMZA_SYSTEM_OFFSET_OLDEST:
                newAutoOffsetReset = KAFKA_OFFSET_EARLIEST;
                break;
            default:
                throw new SamzaException("Using invalid value for samza default offset config " + autoOffsetReset
                        + ". See samza config for the correct values");
            }
            LOG.info("Auto offset reset value for KafkaConsumer for system {} converted from {}(samza) to {}",
                    samzaOffsetDefault, newAutoOffsetReset);
        }

        return newAutoOffsetReset;
    }
}