Java tutorial
//package com.esgyn.kafka; import org.apache.commons.cli.DefaultParser; import java.sql.BatchUpdateException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Statement; import java.util.Arrays; import java.util.Properties; //import java.util.Set; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.common.TopicPartition; import org.apache.trafodion.jdbc.t2.*; import kafka.consumer.ConsumerTimeoutException; @SuppressWarnings("deprecation") //@@@ START COPYRIGHT @@@ // //Copyright (c) 2016, Esgyn Corporation, http://www.esgyn.com. // //Licensed under the Apache License, Version 2.0 (the //"License"); you may not use this file except in compliance //with the License. You may obtain a copy of the License at // //http://www.apache.org/licenses/LICENSE-2.0 // //Unless required by applicable law or agreed to in writing, //software distributed under the License is distributed on an //"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY //KIND, either express or implied. See the License for the //specific language governing permissions and limitations //under the License. // //@@@ END COPYRIGHT @@@ /* * This represents a typical Kafka consumer that uses EsgynDB for data storage. * It is a single-threaded server that can be replicated to scale out - each copy * handling a partition of a topic. * * Execution of each server in the group handled by... * TBD * a. pdsh script per node * b. zookeeper? * * */ // flow: // get execution parameters // make topic connection - which partition is assigned? // check status table in DB for where this partition left off // set to start from this offset // poll... // if no restart info in status table, start at end, poll... // while polling returns records // upsert into esgyndb table // kafka commit every <param value> record // update offset value into status table for this topic/partition /* * Added option to insert using UDF. * * UDF runs until "no more records"...in Kafka case, we'd want UDF to exit after 'X' number of records * or some T/O value of msg reception...so that we can re-drive the call. * In this example, we have a pile of messages already in the topic and not an active pub-sub situation. * It's been set up to demonstrate the performance difference of INSERTing via "normal" row inserts versus * inserts directly from UDF output. * * In practice, the UDF would return context info that allows a subsequent call to the UDF to pickup where * the previous left off. The consumer would need to safe store / transction protect this info for restart / * recovery purposes. * */ public class ConsumerServer { private final long DEFAULT_STREAM_TO_MS = 20000; private final long DEFAULT_ZOOK_TO_MS = 10000; private final long DEFAULT_COMMIT_COUNT = 500; private PreparedStatement pStmt; private Connection conn; // execution settings String zookeeper; String broker; String topic; String groupID; String columnCodes; long streamTO; long zkTO; long commitCount; char delimiter; int udfMode; int insMode; boolean autoCommit; boolean t2Connect; KafkaConsumer<String, String> kafka; ConsumerServer() { zookeeper = ""; broker = ""; topic = ""; groupID = ""; columnCodes = ""; streamTO = 0; zkTO = 0; commitCount = 0; delimiter = ','; udfMode = -1; autoCommit = false; insMode = -1; t2Connect = false; } public void init(String[] args) throws ParseException { /* * Get command line args * * Cmd line params: * -z --zk zookeeper connection (node:port[/kafka?] * -b --broker broker location * -t --topic topic * -g --group groupID * --sto stream T/O (data polling) * --zkto zk T/O * --cols output column descriptions - Hans' code * -d --delim field delimiter - Hans' code * -c --commit commit interval (num recs) * -u --udf use insert via udf * --autocom autocommit * -i --insert iterative insert/upsert * --t2 use T2 JDBC, defaults to T4 * */ Option zkOption = Option.builder("z").longOpt("zook").required(false).hasArg() .desc("zookeeper connection list, ex: <node>:port[/kafka],...").build(); Option brokerOption = Option.builder("b").longOpt("broker").required(false).hasArg() .desc("bootstrap.servers setting, ex: <node>:9092").build(); Option topicOption = Option.builder("t").longOpt("topic").required(true).hasArg() .desc("REQUIRED. topic of subscription").build(); Option groupOption = Option.builder("g").longOpt("group").required(true).hasArg() .desc("REQUIRED. groupID for this consumer").build(); Option stoOption = Option.builder().longOpt("sto").required(false).hasArg() .desc("kafka poll time-out limit, default 60000ms").build(); Option ztoOption = Option.builder().longOpt("zkto").required(false).hasArg() .desc("zookeeper time-out limit, default 10000ms").build(); Option colsOption = Option.builder().longOpt("cols").required(false).hasArg() .desc("encode of output column defs, one for each delimited field in Kafka message").build(); Option delimOption = Option.builder("d").longOpt("delim").required(false).hasArg() .desc("field delimiter, default: ','(comma)").build(); Option commitOption = Option.builder("c").longOpt("commit").required(false).hasArg() .desc("num message per Kakfa synch, default: 500").build(); Option t2Option = Option.builder().longOpt("t2").required(false).desc("use T2 JDBC, default: use T4") .build(); Option udfOption = Option.builder("u").longOpt("udf").required(false).hasArg() .desc("use UDF for insert: 1 = upsert load; 2 = insert").build(); Option autocOption = Option.builder().longOpt("autocom").required(false).desc("autocommit, default false") .build(); Option insOption = Option.builder("i").longOpt("insert").required(false).hasArg() .desc("insert mode: 1 = insert; 2 = upsert").build(); Options exeOptions = new Options(); exeOptions.addOption(zkOption); exeOptions.addOption(brokerOption); exeOptions.addOption(topicOption); exeOptions.addOption(groupOption); exeOptions.addOption(stoOption); exeOptions.addOption(ztoOption); exeOptions.addOption(colsOption); exeOptions.addOption(delimOption); exeOptions.addOption(commitOption); exeOptions.addOption(udfOption); exeOptions.addOption(autocOption); exeOptions.addOption(insOption); exeOptions.addOption(t2Option); // With required options, can't have HELP option to display help as it will only // indicate that "required options are missing" if (args.length == 0) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("Consumer Server", exeOptions); System.exit(0); } CommandLineParser parser = new DefaultParser(); CommandLine cmdLine = parser.parse(exeOptions, args); // for the required options, move the value topic = cmdLine.getOptionValue("topic"); groupID = cmdLine.getOptionValue("group"); zookeeper = cmdLine.hasOption("zook") ? cmdLine.getOptionValue("zook") : null; broker = cmdLine.hasOption("broker") ? cmdLine.getOptionValue("broker") : null; columnCodes = cmdLine.hasOption("cols") ? cmdLine.getOptionValue("cols") : ""; streamTO = cmdLine.hasOption("sto") ? Long.parseLong(cmdLine.getOptionValue("sto")) : DEFAULT_STREAM_TO_MS; zkTO = cmdLine.hasOption("zkto") ? Long.parseLong(cmdLine.getOptionValue("zkto")) : DEFAULT_ZOOK_TO_MS; commitCount = cmdLine.hasOption("commit") ? Long.parseLong(cmdLine.getOptionValue("commit")) : DEFAULT_COMMIT_COUNT; delimiter = cmdLine.hasOption("delim") ? cmdLine.getOptionValue("delim").charAt(0) : ','; udfMode = cmdLine.hasOption("udf") ? Integer.parseInt(cmdLine.getOptionValue("udf")) : 0; autoCommit = cmdLine.hasOption("autocom") ? true : false; insMode = cmdLine.hasOption("insert") ? Integer.parseInt(cmdLine.getOptionValue("insert")) : 0; t2Connect = cmdLine.hasOption("t2") ? true : false; // one of zook | broker must be given if (zookeeper == null && broker == null) { System.out.println("*** Error: Must provide zookeeper or broker string"); System.exit(0); } } public void getConsumer(String zkOrBroker, boolean useZk) { /* * instantiate a KafkaConsumer with the cmd line options * * Older versions of Kafka might only recognize ZK interface - allow for that * */ Properties props = new Properties(); // props.put("auto.offset.reset", "smallest"); if (useZk) props.put("zookeeper.connect", zkOrBroker); else props.put("bootstrap.servers", zkOrBroker); props.put("group.id", groupID); props.put("enable.auto.commit", "false"); props.put("session.timeout.ms", String.valueOf(zkTO)); // zookeeper wait t/o props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); kafka = new KafkaConsumer<>(props); } public void processMessages() { // for this exercise start from offset 0 // produce batches of n size for jdbc and insert // for this table // char(10), char(20), long String sqlInsert = "INSERT INTO kblog.BLOGDATA VALUES (?,?,?,?,?)"; String sqlUpsert = "UPSERT INTO kblog.BLOGDATA VALUES (?,?,?,?,?)"; final String UDFCALL = " select * from udf(kblog.kaf3('nap007:9092'," + " 'gid'," + " 'blogit'," + " 0," + " 'null'," + " 'C10C20IC55C55'," + " '|'," + " -1," + " 1000 ))"; final String SQLUPSERT = "upsert using load into kblog.blogdata "; final String SQLINSERT = "insert into kblog.blogdata "; final String SQLUPSERT2 = "upsert into kblog.blogdata "; try { if (t2Connect) { // T2 Class.forName("org.apache.trafodion.jdbc.t2.T2Driver"); conn = DriverManager.getConnection("jdbc:t2jdbc:"); } else { // T4 Class.forName("org.trafodion.jdbc.t4.T4Driver"); conn = DriverManager.getConnection("jdbc:t4jdbc://nap007:23400/:", "trafodion", "passw"); } conn.setAutoCommit(autoCommit); } catch (SQLException sx) { System.out.println("SQL error: " + sx.getMessage()); System.exit(1); } catch (ClassNotFoundException cx) { System.out.println("Driver class not found: " + cx.getMessage()); System.exit(2); } // message processing loop String[] msgFields; long numRows = 0; long totalRows = 0; int[] batchResult; if (udfMode == 0 && insMode == 0) { // missing cmd line setting System.out.println("*** Neither UDF nor INSERT mode specified - aborting ***"); System.exit(2); } try { if (udfMode > 0) { long diff = 0; long startTime = System.currentTimeMillis(); switch (udfMode) { case 1: // upsert using load pStmt = conn.prepareStatement(SQLUPSERT + UDFCALL); totalRows = pStmt.executeUpdate(); diff = (System.currentTimeMillis() - startTime); System.out.println("Upsert loaded row count: " + totalRows + " in " + diff + " ms"); break; case 2: // insert pStmt = conn.prepareStatement(SQLINSERT + UDFCALL); totalRows = pStmt.executeUpdate(); if (!autoCommit) { conn.commit(); diff = (System.currentTimeMillis() - startTime); System.out .println("Insert row count (autocommit off): " + totalRows + " in " + diff + " ms"); } else { diff = (System.currentTimeMillis() - startTime); System.out .println("Insert row count (autocommit on): " + totalRows + " in " + diff + " ms"); } break; case 3: // upsert pStmt = conn.prepareStatement(SQLUPSERT2 + UDFCALL); totalRows = pStmt.executeUpdate(); if (!autoCommit) { conn.commit(); diff = (System.currentTimeMillis() - startTime); System.out .println("Upsert row count (autocommit off): " + totalRows + " in " + diff + " ms"); } else { diff = (System.currentTimeMillis() - startTime); System.out .println("Upsert row count (autocommit on): " + totalRows + " in " + diff + " ms"); } break; default: // illegal value System.out.println("*** Only udf values 1,2,3 allowed; found: " + udfMode); System.exit(2); } // switch } // udfMode else { // iterative insert/upsert switch (insMode) { case 1: // insert pStmt = conn.prepareStatement(sqlInsert); break; case 2: //upsert pStmt = conn.prepareStatement(sqlUpsert); break; default: // illegal System.out.println("*** Only insert values 1,2 allowed; found: " + insMode); System.exit(2); } // switch kafka.subscribe(Arrays.asList(topic)); // priming poll kafka.poll(100); // always start from beginning kafka.seekToBeginning(Arrays.asList(new TopicPartition(topic, 0))); // enable autocommit and singleton inserts for comparative timings long startTime = System.currentTimeMillis(); while (true) { // note that we don't commitSync to kafka - tho we should ConsumerRecords<String, String> records = kafka.poll(streamTO); if (records.isEmpty()) break; // timed out for (ConsumerRecord<String, String> msg : records) { msgFields = msg.value().split("\\" + Character.toString(delimiter)); // position info for this message long offset = msg.offset(); int partition = msg.partition(); String topic = msg.topic(); pStmt.setString(1, msgFields[0]); pStmt.setString(2, msgFields[1]); pStmt.setLong(3, Long.parseLong(msgFields[2])); pStmt.setString(4, msgFields[3]); pStmt.setString(5, msgFields[4]); numRows++; totalRows++; if (autoCommit) { // single ins/up sert pStmt.executeUpdate(); } else { pStmt.addBatch(); if ((numRows % commitCount) == 0) { numRows = 0; batchResult = pStmt.executeBatch(); conn.commit(); } } } // for each msg } // while true // get here when poll returns no records if (numRows > 0 && !autoCommit) { // remaining rows batchResult = pStmt.executeBatch(); conn.commit(); } long diff = (System.currentTimeMillis() - startTime); if (autoCommit) System.out.println("Total rows: " + totalRows + " in " + diff + " ms"); else System.out.println( "Total rows: " + totalRows + " in " + diff + " ms; batch size = " + commitCount); kafka.close(); } // else } // try catch (ConsumerTimeoutException to) { System.out.println("consumer time out; " + to.getMessage()); System.exit(1); } catch (BatchUpdateException bx) { int[] insertCounts = bx.getUpdateCounts(); int count = 1; for (int i : insertCounts) { if (i == Statement.EXECUTE_FAILED) System.out.println("Error on request #" + count + ": Execute failed"); else count++; } System.out.println(bx.getMessage()); System.exit(1); } catch (SQLException sx) { System.out.println("SQL error: " + sx.getMessage()); System.exit(1); } } public static void main(String[] args) { ConsumerServer me = new ConsumerServer(); try { me.init(args); } catch (ParseException p) { System.out.println("Cmd line error: " + p.getMessage()); System.exit(0); } // debug System.out.println("zookeeper = " + me.zookeeper); System.out.println("broker = " + me.broker); System.out.println("topic = " + me.topic); System.out.println("groupID = " + me.groupID); System.out.println("columnCodes = " + me.columnCodes); System.out.println("streamTO = " + me.streamTO); System.out.println("zkTO = " + me.zkTO); System.out.println("commitCount = " + me.commitCount); System.out.println("delimiter = " + me.delimiter); System.out.println("udfMode = " + me.udfMode); System.out.println("insMode = " + me.insMode); System.out.println("autocommit = " + me.autoCommit); System.out.println("t2Driver = " + me.t2Connect); // connect to kafka w/ either broker/zook setting if (me.zookeeper == null) me.getConsumer(me.broker, false); else me.getConsumer(me.zookeeper, true); me.processMessages(); } }