Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.streaming.connectors.kafka; import kafka.admin.AdminUtils; import kafka.common.KafkaException; import kafka.api.PartitionMetadata; import kafka.network.SocketServer; import kafka.server.KafkaConfig; import kafka.server.KafkaServer; import kafka.utils.SystemTime$; import kafka.utils.ZkUtils; import org.I0Itec.zkclient.ZkClient; import org.apache.commons.io.FileUtils; import org.apache.curator.test.TestingServer; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.DataStreamSink; import org.apache.flink.streaming.api.operators.StreamSink; import org.apache.flink.streaming.connectors.kafka.testutils.ZooKeeperStringSerializer; import org.apache.flink.streaming.connectors.kafka.partitioner.KafkaPartitioner; import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; import org.apache.flink.util.NetUtils; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.protocol.SecurityProtocol; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import scala.collection.Seq; import java.io.File; import java.net.BindException; import java.util.ArrayList; import java.util.Map; import java.util.HashMap; import java.util.List; import java.util.Properties; import java.util.UUID; import static org.apache.flink.util.NetUtils.hostAndPortToUrlString; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; /** * An implementation of the KafkaServerProvider for Kafka 0.9 */ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment { protected static final Logger LOG = LoggerFactory.getLogger(KafkaTestEnvironmentImpl.class); private File tmpZkDir; private File tmpKafkaParent; private List<File> tmpKafkaDirs; private List<KafkaServer> brokers; private TestingServer zookeeper; private String zookeeperConnectionString; private String brokerConnectionString = ""; private Properties standardProps; private Properties additionalServerProperties; private boolean secureMode = false; // 6 seconds is default. Seems to be too small for travis. 30 seconds private String zkTimeout = "30000"; public String getBrokerConnectionString() { return brokerConnectionString; } @Override public Properties getStandardProperties() { return standardProps; } @Override public String getVersion() { return "0.9"; } @Override public List<KafkaServer> getBrokers() { return brokers; } @Override public <T> FlinkKafkaConsumerBase<T> getConsumer(List<String> topics, KeyedDeserializationSchema<T> readSchema, Properties props) { return new FlinkKafkaConsumer09<>(topics, readSchema, props); } @Override public <T> StreamSink<T> getProducerSink(String topic, KeyedSerializationSchema<T> serSchema, Properties props, KafkaPartitioner<T> partitioner) { FlinkKafkaProducer09<T> prod = new FlinkKafkaProducer09<>(topic, serSchema, props, partitioner); prod.setFlushOnCheckpoint(true); return new StreamSink<>(prod); } @Override public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, KafkaPartitioner<T> partitioner) { FlinkKafkaProducer09<T> prod = new FlinkKafkaProducer09<>(topic, serSchema, props, partitioner); prod.setFlushOnCheckpoint(true); return stream.addSink(prod); } @Override public KafkaOffsetHandler createOffsetHandler() { return new KafkaOffsetHandlerImpl(); } @Override public void restartBroker(int leaderId) throws Exception { brokers.set(leaderId, getKafkaServer(leaderId, tmpKafkaDirs.get(leaderId))); } @Override public int getLeaderToShutDown(String topic) throws Exception { ZkUtils zkUtils = getZkUtils(); try { PartitionMetadata firstPart = null; do { if (firstPart != null) { LOG.info("Unable to find leader. error code {}", firstPart.errorCode()); // not the first try. Sleep a bit Thread.sleep(150); } Seq<PartitionMetadata> partitionMetadata = AdminUtils.fetchTopicMetadataFromZk(topic, zkUtils) .partitionsMetadata(); firstPart = partitionMetadata.head(); } while (firstPart.errorCode() != 0); return firstPart.leader().get().id(); } finally { zkUtils.close(); } } @Override public int getBrokerId(KafkaServer server) { return server.config().brokerId(); } @Override public boolean isSecureRunSupported() { return true; } @Override public void prepare(int numKafkaServers, Properties additionalServerProperties, boolean secureMode) { //increase the timeout since in Travis ZK connection takes long time for secure connection. if (secureMode) { //run only one kafka server to avoid multiple ZK connections from many instances - Travis timeout numKafkaServers = 1; zkTimeout = String.valueOf(Integer.parseInt(zkTimeout) * 15); } this.additionalServerProperties = additionalServerProperties; this.secureMode = secureMode; File tempDir = new File(System.getProperty("java.io.tmpdir")); tmpZkDir = new File(tempDir, "kafkaITcase-zk-dir-" + (UUID.randomUUID().toString())); assertTrue("cannot create zookeeper temp dir", tmpZkDir.mkdirs()); tmpKafkaParent = new File(tempDir, "kafkaITcase-kafka-dir*" + (UUID.randomUUID().toString())); assertTrue("cannot create kafka temp dir", tmpKafkaParent.mkdirs()); tmpKafkaDirs = new ArrayList<>(numKafkaServers); for (int i = 0; i < numKafkaServers; i++) { File tmpDir = new File(tmpKafkaParent, "server-" + i); assertTrue("cannot create kafka temp dir", tmpDir.mkdir()); tmpKafkaDirs.add(tmpDir); } zookeeper = null; brokers = null; try { LOG.info("Starting Zookeeper"); zookeeper = new TestingServer(-1, tmpZkDir); zookeeperConnectionString = zookeeper.getConnectString(); LOG.info("zookeeperConnectionString: {}", zookeeperConnectionString); LOG.info("Starting KafkaServer"); brokers = new ArrayList<>(numKafkaServers); for (int i = 0; i < numKafkaServers; i++) { brokers.add(getKafkaServer(i, tmpKafkaDirs.get(i))); SocketServer socketServer = brokers.get(i).socketServer(); if (secureMode) { brokerConnectionString += hostAndPortToUrlString(KafkaTestEnvironment.KAFKA_HOST, brokers.get(i).socketServer().boundPort(SecurityProtocol.SASL_PLAINTEXT)) + ","; } else { brokerConnectionString += hostAndPortToUrlString(KafkaTestEnvironment.KAFKA_HOST, brokers.get(i).socketServer().boundPort(SecurityProtocol.PLAINTEXT)) + ","; } } LOG.info("ZK and KafkaServer started."); } catch (Throwable t) { t.printStackTrace(); fail("Test setup failed: " + t.getMessage()); } LOG.info("brokerConnectionString --> {}", brokerConnectionString); standardProps = new Properties(); standardProps.setProperty("zookeeper.connect", zookeeperConnectionString); standardProps.setProperty("bootstrap.servers", brokerConnectionString); standardProps.setProperty("group.id", "flink-tests"); standardProps.setProperty("enable.auto.commit", "false"); standardProps.setProperty("zookeeper.session.timeout.ms", zkTimeout); standardProps.setProperty("zookeeper.connection.timeout.ms", zkTimeout); standardProps.setProperty("auto.offset.reset", "earliest"); // read from the beginning. (earliest is kafka 0.9 value) standardProps.setProperty("max.partition.fetch.bytes", "256"); // make a lot of fetches (MESSAGES MUST BE SMALLER!) } @Override public void shutdown() { for (KafkaServer broker : brokers) { if (broker != null) { broker.shutdown(); } } brokers.clear(); if (zookeeper != null) { try { zookeeper.stop(); zookeeper.close(); } catch (Exception e) { LOG.warn("ZK.stop() failed", e); } zookeeper = null; } // clean up the temp spaces if (tmpKafkaParent != null && tmpKafkaParent.exists()) { try { FileUtils.deleteDirectory(tmpKafkaParent); } catch (Exception e) { // ignore } } if (tmpZkDir != null && tmpZkDir.exists()) { try { FileUtils.deleteDirectory(tmpZkDir); } catch (Exception e) { // ignore } } } public ZkUtils getZkUtils() { LOG.info("In getZKUtils:: zookeeperConnectionString = {}", zookeeperConnectionString); ZkClient creator = new ZkClient(zookeeperConnectionString, Integer.valueOf(standardProps.getProperty("zookeeper.session.timeout.ms")), Integer.valueOf(standardProps.getProperty("zookeeper.connection.timeout.ms")), new ZooKeeperStringSerializer()); return ZkUtils.apply(creator, false); } @Override public void createTestTopic(String topic, int numberOfPartitions, int replicationFactor, Properties topicConfig) { // create topic with one client LOG.info("Creating topic {}", topic); ZkUtils zkUtils = getZkUtils(); try { AdminUtils.createTopic(zkUtils, topic, numberOfPartitions, replicationFactor, topicConfig); } finally { zkUtils.close(); } LOG.info("Topic {} create request is successfully posted", topic); // validate that the topic has been created final long deadline = System.nanoTime() + Integer.parseInt(zkTimeout) * 1_000_000L; do { try { if (secureMode) { //increase wait time since in Travis ZK timeout occurs frequently int wait = Integer.parseInt(zkTimeout) / 100; LOG.info("waiting for {} msecs before the topic {} can be checked", wait, topic); Thread.sleep(wait); } else { Thread.sleep(100); } } catch (InterruptedException e) { // restore interrupted state } // we could use AdminUtils.topicExists(zkUtils, topic) here, but it's results are // not always correct. LOG.info("Validating if the topic {} has been created or not", topic); // create a new ZK utils connection ZkUtils checkZKConn = getZkUtils(); if (AdminUtils.topicExists(checkZKConn, topic)) { LOG.info("topic {} has been created successfully", topic); checkZKConn.close(); return; } LOG.info("topic {} has not been created yet. Will check again...", topic); checkZKConn.close(); } while (System.nanoTime() < deadline); fail("Test topic could not be created"); } @Override public void deleteTestTopic(String topic) { ZkUtils zkUtils = getZkUtils(); try { LOG.info("Deleting topic {}", topic); ZkClient zk = new ZkClient(zookeeperConnectionString, Integer.valueOf(standardProps.getProperty("zookeeper.session.timeout.ms")), Integer.valueOf(standardProps.getProperty("zookeeper.connection.timeout.ms")), new ZooKeeperStringSerializer()); AdminUtils.deleteTopic(zkUtils, topic); zk.close(); } finally { zkUtils.close(); } } /** * Copied from com.github.sakserv.minicluster.KafkaLocalBrokerIntegrationTest (ASL licensed) */ protected KafkaServer getKafkaServer(int brokerId, File tmpFolder) throws Exception { Properties kafkaProperties = new Properties(); // properties have to be Strings kafkaProperties.put("advertised.host.name", KAFKA_HOST); kafkaProperties.put("broker.id", Integer.toString(brokerId)); kafkaProperties.put("log.dir", tmpFolder.toString()); kafkaProperties.put("zookeeper.connect", zookeeperConnectionString); kafkaProperties.put("message.max.bytes", String.valueOf(50 * 1024 * 1024)); kafkaProperties.put("replica.fetch.max.bytes", String.valueOf(50 * 1024 * 1024)); // for CI stability, increase zookeeper session timeout kafkaProperties.put("zookeeper.session.timeout.ms", zkTimeout); kafkaProperties.put("zookeeper.connection.timeout.ms", zkTimeout); if (additionalServerProperties != null) { kafkaProperties.putAll(additionalServerProperties); } final int numTries = 5; for (int i = 1; i <= numTries; i++) { int kafkaPort = NetUtils.getAvailablePort(); kafkaProperties.put("port", Integer.toString(kafkaPort)); //to support secure kafka cluster if (secureMode) { LOG.info("Adding Kafka secure configurations"); kafkaProperties.put("listeners", "SASL_PLAINTEXT://" + KAFKA_HOST + ":" + kafkaPort); kafkaProperties.put("advertised.listeners", "SASL_PLAINTEXT://" + KAFKA_HOST + ":" + kafkaPort); kafkaProperties.putAll(getSecureProperties()); } KafkaConfig kafkaConfig = new KafkaConfig(kafkaProperties); try { scala.Option<String> stringNone = scala.Option.apply(null); KafkaServer server = new KafkaServer(kafkaConfig, SystemTime$.MODULE$, stringNone); server.startup(); return server; } catch (KafkaException e) { if (e.getCause() instanceof BindException) { // port conflict, retry... LOG.info("Port conflict when starting Kafka Broker. Retrying..."); } else { throw e; } } } throw new Exception("Could not start Kafka after " + numTries + " retries due to port conflicts."); } public Properties getSecureProperties() { Properties prop = new Properties(); if (secureMode) { prop.put("security.inter.broker.protocol", "SASL_PLAINTEXT"); prop.put("security.protocol", "SASL_PLAINTEXT"); prop.put("sasl.kerberos.service.name", "kafka"); //add special timeout for Travis prop.setProperty("zookeeper.session.timeout.ms", zkTimeout); prop.setProperty("zookeeper.connection.timeout.ms", zkTimeout); prop.setProperty("metadata.fetch.timeout.ms", "120000"); } return prop; } private class KafkaOffsetHandlerImpl implements KafkaOffsetHandler { private final KafkaConsumer<byte[], byte[]> offsetClient; public KafkaOffsetHandlerImpl() { Properties props = new Properties(); props.putAll(standardProps); props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); offsetClient = new KafkaConsumer<>(props); } @Override public Long getCommittedOffset(String topicName, int partition) { OffsetAndMetadata committed = offsetClient.committed(new TopicPartition(topicName, partition)); return (committed != null) ? committed.offset() : null; } @Override public void setCommittedOffset(String topicName, int partition, long offset) { Map<TopicPartition, OffsetAndMetadata> partitionAndOffset = new HashMap<>(); partitionAndOffset.put(new TopicPartition(topicName, partition), new OffsetAndMetadata(offset)); offsetClient.commitSync(partitionAndOffset); } @Override public void close() { offsetClient.close(); } } }