List of usage examples for java.util Properties putAll
@Override public synchronized void putAll(Map<?, ?> t)
From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java
/** * Tests that the source can be properly canceled when reading empty partitions. *//*w ww .ja v a2 s .co m*/ public void runCancelingOnEmptyInputTest() throws Exception { final String topic = "cancelingOnEmptyInputTopic"; final int parallelism = 3; createTestTopic(topic, parallelism, 1); final AtomicReference<Throwable> error = new AtomicReference<>(); final Runnable jobRunner = new Runnable() { @Override public void run() { try { final StreamExecutionEnvironment env = StreamExecutionEnvironment .createRemoteEnvironment("localhost", flinkPort); env.setParallelism(parallelism); env.enableCheckpointing(100); env.getConfig().disableSysoutLogging(); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), props); env.addSource(source).addSink(new DiscardingSink<String>()); env.execute("CancelingOnEmptyInputTest"); } catch (Throwable t) { LOG.error("Job Runner failed with exception", t); error.set(t); } } }; Thread runnerThread = new Thread(jobRunner, "program runner thread"); runnerThread.start(); // wait a bit before canceling Thread.sleep(2000); Throwable failueCause = error.get(); if (failueCause != null) { failueCause.printStackTrace(); Assert.fail("Test failed prematurely with: " + failueCause.getMessage()); } // cancel JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout)); // wait for the program to be done and validate that we failed with the right exception runnerThread.join(); failueCause = error.get(); assertNotNull("program did not fail properly due to canceling", failueCause); assertTrue(failueCause.getMessage().contains("Job was cancelled")); deleteTestTopic(topic); }
From source file:org.apache.falcon.oozie.OozieCoordinatorBuilder.java
protected Properties createCoordDefaultConfiguration(Cluster cluster, String coordName) throws FalconException { Properties props = new Properties(); props.put(ARG.entityName.getPropName(), entity.getName()); props.put(ARG.nominalTime.getPropName(), NOMINAL_TIME_EL); props.put(ARG.timeStamp.getPropName(), ACTUAL_TIME_EL); props.put("userBrokerUrl", ClusterHelper.getMessageBrokerUrl(cluster)); props.put("userBrokerImplClass", ClusterHelper.getMessageBrokerImplClass(cluster)); String falconBrokerUrl = StartupProperties.get().getProperty(ARG.brokerUrl.getPropName(), "tcp://localhost:61616?daemon=true"); props.put(ARG.brokerUrl.getPropName(), falconBrokerUrl); String falconBrokerImplClass = StartupProperties.get().getProperty(ARG.brokerImplClass.getPropName(), ClusterHelper.DEFAULT_BROKER_IMPL_CLASS); props.put(ARG.brokerImplClass.getPropName(), falconBrokerImplClass); String jmsMessageTTL = StartupProperties.get().getProperty("broker.ttlInMins", DEFAULT_BROKER_MSG_TTL.toString()); props.put(ARG.brokerTTL.getPropName(), jmsMessageTTL); props.put(ARG.entityType.getPropName(), entity.getEntityType().name()); props.put("logDir", getStoragePath(new Path(EntityUtil.getBaseStagingPath(cluster, entity), "logs"))); props.put(OozieClient.EXTERNAL_ID, new ExternalId(entity.getName(), EntityUtil.getWorkflowNameTag(coordName, entity), "${coord:nominalTime()}").getId()); props.put("workflowEngineUrl", ClusterHelper.getOozieUrl(cluster)); if (EntityUtil.getLateProcess(entity) == null || EntityUtil.getLateProcess(entity).getLateInputs() == null || EntityUtil.getLateProcess(entity).getLateInputs().size() == 0) { props.put("shouldRecord", "false"); } else {// w w w . j av a 2s . co m props.put("shouldRecord", "true"); } props.put("entityName", entity.getName()); props.put("entityType", entity.getEntityType().name().toLowerCase()); props.put(ARG.cluster.getPropName(), cluster.getName()); props.put(MR_QUEUE_NAME, "default"); props.put(MR_JOB_PRIORITY, "NORMAL"); //props in entity override the set props. props.putAll(getEntityProperties(entity)); return props; }
From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java
/** * This test ensures that when the consumers retrieve some start offset from kafka (earliest, latest), that this offset * is committed to Kafka, even if some partitions are not read. * * Test:/* w ww . j a va 2 s.com*/ * - Create 3 partitions * - write 50 messages into each. * - Start three consumers with auto.offset.reset='latest' and wait until they committed into Kafka. * - Check if the offsets in Kafka are set to 50 for the three partitions * * See FLINK-3440 as well */ public void runAutoOffsetRetrievalAndCommitToKafka() throws Exception { // 3 partitions with 50 records each (0-49, so the expected commit offset of each partition should be 50) final int parallelism = 3; final int recordsInEachPartition = 50; final String topicName = writeSequence("testAutoOffsetRetrievalAndCommitToKafkaTopic", recordsInEachPartition, parallelism, 1); final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); env.getConfig().disableSysoutLogging(); env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env.setParallelism(parallelism); env.enableCheckpointing(200); Properties readProps = new Properties(); readProps.putAll(standardProps); readProps.setProperty("auto.offset.reset", "latest"); // set to reset to latest, so that partitions are initially not read DataStream<String> stream = env .addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), readProps)); stream.addSink(new DiscardingSink<String>()); final AtomicReference<Throwable> errorRef = new AtomicReference<>(); final Thread runner = new Thread("runner") { @Override public void run() { try { env.execute(); } catch (Throwable t) { if (!(t.getCause() instanceof JobCancellationException)) { errorRef.set(t); } } } }; runner.start(); KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler(); final Long l50 = 50L; // the final committed offset in Kafka should be 50 final long deadline = 30_000_000_000L + System.nanoTime(); do { Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0); Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1); Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2); if (l50.equals(o1) && l50.equals(o2) && l50.equals(o3)) { break; } Thread.sleep(100); } while (System.nanoTime() < deadline); // cancel the job JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout)); final Throwable t = errorRef.get(); if (t != null) { throw new RuntimeException("Job failed with an exception", t); } // final check to see if offsets are correctly in Kafka Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0); Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1); Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2); Assert.assertEquals(Long.valueOf(50L), o1); Assert.assertEquals(Long.valueOf(50L), o2); Assert.assertEquals(Long.valueOf(50L), o3); kafkaOffsetHandler.close(); deleteTestTopic(topicName); }
From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java
public void runKeyValueTest() throws Exception { final String topic = "keyvaluetest"; createTestTopic(topic, 1, 1);// w w w.j a v a2 s. c o m final int ELEMENT_COUNT = 5000; // ----------- Write some data into Kafka ------------------- StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); env.setParallelism(1); env.setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().disableSysoutLogging(); DataStream<Tuple2<Long, PojoValue>> kvStream = env.addSource(new SourceFunction<Tuple2<Long, PojoValue>>() { @Override public void run(SourceContext<Tuple2<Long, PojoValue>> ctx) throws Exception { Random rnd = new Random(1337); for (long i = 0; i < ELEMENT_COUNT; i++) { PojoValue pojo = new PojoValue(); pojo.when = new Date(rnd.nextLong()); pojo.lon = rnd.nextLong(); pojo.lat = i; // make every second key null to ensure proper "null" serialization Long key = (i % 2 == 0) ? null : i; ctx.collect(new Tuple2<>(key, pojo)); } } @Override public void cancel() { } }); KeyedSerializationSchema<Tuple2<Long, PojoValue>> schema = new TypeInformationKeyValueSerializationSchema<>( Long.class, PojoValue.class, env.getConfig()); Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings); producerProperties.setProperty("retries", "3"); kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null); env.execute("Write KV to Kafka"); // ----------- Read the data again ------------------- env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); env.setParallelism(1); env.setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().disableSysoutLogging(); KeyedDeserializationSchema<Tuple2<Long, PojoValue>> readSchema = new TypeInformationKeyValueSerializationSchema<>( Long.class, PojoValue.class, env.getConfig()); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); DataStream<Tuple2<Long, PojoValue>> fromKafka = env .addSource(kafkaServer.getConsumer(topic, readSchema, props)); fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long, PojoValue>, Object>() { long counter = 0; @Override public void flatMap(Tuple2<Long, PojoValue> value, Collector<Object> out) throws Exception { // the elements should be in order. Assert.assertTrue("Wrong value " + value.f1.lat, value.f1.lat == counter); if (value.f1.lat % 2 == 0) { assertNull("key was not null", value.f0); } else { Assert.assertTrue("Wrong value " + value.f0, value.f0 == counter); } counter++; if (counter == ELEMENT_COUNT) { // we got the right number of elements throw new SuccessException(); } } }); tryExecute(env, "Read KV from Kafka"); deleteTestTopic(topic); }
From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java
/** * Test producing and consuming into multiple topics * @throws java.lang.Exception/*from w w w.jav a2 s . c o m*/ */ public void runProduceConsumeMultipleTopics() throws java.lang.Exception { final int NUM_TOPICS = 5; final int NUM_ELEMENTS = 20; StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); env.getConfig().disableSysoutLogging(); // create topics with content final List<String> topics = new ArrayList<>(); for (int i = 0; i < NUM_TOPICS; i++) { final String topic = "topic-" + i; topics.add(topic); // create topic createTestTopic(topic, i + 1 /*partitions*/, 1); } // run first job, producing into all topics DataStream<Tuple3<Integer, Integer, String>> stream = env .addSource(new RichParallelSourceFunction<Tuple3<Integer, Integer, String>>() { @Override public void run(SourceContext<Tuple3<Integer, Integer, String>> ctx) throws Exception { int partition = getRuntimeContext().getIndexOfThisSubtask(); for (int topicId = 0; topicId < NUM_TOPICS; topicId++) { for (int i = 0; i < NUM_ELEMENTS; i++) { ctx.collect(new Tuple3<>(partition, i, "topic-" + topicId)); } } } @Override public void cancel() { } }); Tuple2WithTopicSchema schema = new Tuple2WithTopicSchema(env.getConfig()); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); kafkaServer.produceIntoKafka(stream, "dummy", schema, props, null); env.execute("Write to topics"); // run second job consuming from multiple topics env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); env.getConfig().disableSysoutLogging(); stream = env.addSource(kafkaServer.getConsumer(topics, schema, props)); stream.flatMap(new FlatMapFunction<Tuple3<Integer, Integer, String>, Integer>() { Map<String, Integer> countPerTopic = new HashMap<>(NUM_TOPICS); @Override public void flatMap(Tuple3<Integer, Integer, String> value, Collector<Integer> out) throws Exception { Integer count = countPerTopic.get(value.f2); if (count == null) { count = 1; } else { count++; } countPerTopic.put(value.f2, count); // check map: for (Map.Entry<String, Integer> el : countPerTopic.entrySet()) { if (el.getValue() < NUM_ELEMENTS) { break; // not enough yet } if (el.getValue() > NUM_ELEMENTS) { throw new RuntimeException("There is a failure in the test. I've read " + el.getValue() + " from topic " + el.getKey()); } } // we've seen messages from all topics throw new SuccessException(); } }).setParallelism(1); tryExecute(env, "Count elements from the topics"); // delete all topics again for (int i = 0; i < NUM_TOPICS; i++) { final String topic = "topic-" + i; deleteTestTopic(topic); } }
From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java
/** * Tests that the source can be properly canceled when reading full partitions. *///from w w w . j a va 2 s .c o m public void runCancelingOnFullInputTest() throws Exception { final String topic = "cancelingOnFullTopic"; final int parallelism = 3; createTestTopic(topic, parallelism, 1); // launch a producer thread DataGenerators.InfiniteStringsGenerator generator = new DataGenerators.InfiniteStringsGenerator(kafkaServer, topic); generator.start(); // launch a consumer asynchronously final AtomicReference<Throwable> jobError = new AtomicReference<>(); final Runnable jobRunner = new Runnable() { @Override public void run() { try { final StreamExecutionEnvironment env = StreamExecutionEnvironment .createRemoteEnvironment("localhost", flinkPort); env.setParallelism(parallelism); env.enableCheckpointing(100); env.getConfig().disableSysoutLogging(); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), props); env.addSource(source).addSink(new DiscardingSink<String>()); env.execute("Runner for CancelingOnFullInputTest"); } catch (Throwable t) { jobError.set(t); } } }; Thread runnerThread = new Thread(jobRunner, "program runner thread"); runnerThread.start(); // wait a bit before canceling Thread.sleep(2000); Throwable failueCause = jobError.get(); if (failueCause != null) { failueCause.printStackTrace(); Assert.fail("Test failed prematurely with: " + failueCause.getMessage()); } // cancel JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout), "Runner for CancelingOnFullInputTest"); // wait for the program to be done and validate that we failed with the right exception runnerThread.join(); failueCause = jobError.get(); assertNotNull("program did not fail properly due to canceling", failueCause); assertTrue(failueCause.getMessage().contains("Job was cancelled")); if (generator.isAlive()) { generator.shutdown(); generator.join(); } else { Throwable t = generator.getError(); if (t != null) { t.printStackTrace(); fail("Generator failed: " + t.getMessage()); } else { fail("Generator failed with no exception"); } } deleteTestTopic(topic); }
From source file:net.ssehub.easy.instantiation.core.model.buildlangModel.BuildlangExecution.java
/** * Loads properties from <code>file</code> into <code>prop</code> possibly overriding existing * properties. /*from w ww. ja v a 2 s .c o m*/ * * @param file the file name * @param prop the loaded properties (to be modified as a side effect) * @param os if not <b>null</b> to be inserted after the last "." with a following ".". If file * not exists, no exception will be thrown. * @throws VilException in case of loading problems */ private void loadProperties(File file, Properties prop, String os) throws VilException { boolean loadFile = true; if (null != os) { String f = file.toString(); int pos = f.lastIndexOf('.'); if (pos > 0 && pos < f.length()) { f = f.substring(0, pos + 1) + os + "." + f.substring(pos + 1); file = new File(f); loadFile = file.exists(); } else { loadFile = false; } } if (loadFile) { try { FileInputStream fis = new FileInputStream(file); Properties p = new Properties(); p.load(fis); prop.putAll(p); fis.close(); for (String key : prop.stringPropertyNames()) { String value = prop.getProperty(key); // Replace value try { value = StringReplacer.substitute(value, new Resolver(environment), getExpressionParser(), this, null); } catch (VilException e) { EASyLoggerFactory.INSTANCE.getLogger(getClass(), Bundle.ID).exception(e); } prop.setProperty(key, value); } } catch (IOException e) { throw new VilException(e.getMessage(), e, VilException.ID_IO); } } }
From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java
/** * Test Flink's Kafka integration also with very big records (30MB) * see http://stackoverflow.com/questions/21020347/kafka-sending-a-15mb-message * *//*from w ww.j av a 2s.co m*/ public void runBigRecordTestTopology() throws Exception { final String topic = "bigRecordTestTopic"; final int parallelism = 1; // otherwise, the kafka mini clusters may run out of heap space createTestTopic(topic, parallelism, 1); final TypeInformation<Tuple2<Long, byte[]>> longBytesInfo = TypeInfoParser.parse("Tuple2<Long, byte[]>"); final TypeInformationSerializationSchema<Tuple2<Long, byte[]>> serSchema = new TypeInformationSerializationSchema<>( longBytesInfo, new ExecutionConfig()); final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); env.setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().disableSysoutLogging(); env.enableCheckpointing(100); env.setParallelism(parallelism); // add consuming topology: Properties consumerProps = new Properties(); consumerProps.putAll(standardProps); consumerProps.setProperty("fetch.message.max.bytes", Integer.toString(1024 * 1024 * 14)); consumerProps.setProperty("max.partition.fetch.bytes", Integer.toString(1024 * 1024 * 14)); // for the new fetcher consumerProps.setProperty("queued.max.message.chunks", "1"); consumerProps.putAll(secureProps); FlinkKafkaConsumerBase<Tuple2<Long, byte[]>> source = kafkaServer.getConsumer(topic, serSchema, consumerProps); DataStreamSource<Tuple2<Long, byte[]>> consuming = env.addSource(source); consuming.addSink(new SinkFunction<Tuple2<Long, byte[]>>() { private int elCnt = 0; @Override public void invoke(Tuple2<Long, byte[]> value) throws Exception { elCnt++; if (value.f0 == -1) { // we should have seen 11 elements now. if (elCnt == 11) { throw new SuccessException(); } else { throw new RuntimeException("There have been " + elCnt + " elements"); } } if (elCnt > 10) { throw new RuntimeException("More than 10 elements seen: " + elCnt); } } }); // add producing topology Properties producerProps = new Properties(); producerProps.setProperty("max.request.size", Integer.toString(1024 * 1024 * 15)); producerProps.setProperty("retries", "3"); producerProps.putAll(secureProps); producerProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerConnectionStrings); DataStream<Tuple2<Long, byte[]>> stream = env.addSource(new RichSourceFunction<Tuple2<Long, byte[]>>() { private boolean running; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); running = true; } @Override public void run(SourceContext<Tuple2<Long, byte[]>> ctx) throws Exception { Random rnd = new Random(); long cnt = 0; int sevenMb = 1024 * 1024 * 7; while (running) { byte[] wl = new byte[sevenMb + rnd.nextInt(sevenMb)]; ctx.collect(new Tuple2<>(cnt++, wl)); Thread.sleep(100); if (cnt == 10) { // signal end ctx.collect(new Tuple2<>(-1L, new byte[] { 1 })); break; } } } @Override public void cancel() { running = false; } }); kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(serSchema), producerProps, null); tryExecute(env, "big topology test"); deleteTestTopic(topic); }
From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java
/** * Test metrics reporting for consumer/* ww w.j av a 2 s .c o m*/ * * @throws Exception */ public void runMetricsTest() throws Throwable { // create a stream with 5 topics final String topic = "metricsStream"; createTestTopic(topic, 5, 1); final Tuple1<Throwable> error = new Tuple1<>(null); Runnable job = new Runnable() { @Override public void run() { try { // start job writing & reading data. final StreamExecutionEnvironment env1 = StreamExecutionEnvironment .createRemoteEnvironment("localhost", flinkPort); env1.setParallelism(1); env1.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env1.getConfig().disableSysoutLogging(); env1.disableOperatorChaining(); // let the source read everything into the network buffers Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); TypeInformationSerializationSchema<Tuple2<Integer, Integer>> schema = new TypeInformationSerializationSchema<>( TypeInfoParser.<Tuple2<Integer, Integer>>parse("Tuple2<Integer, Integer>"), env1.getConfig()); DataStream<Tuple2<Integer, Integer>> fromKafka = env1 .addSource(kafkaServer.getConsumer(topic, schema, standardProps)); fromKafka.flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Void>() { @Override public void flatMap(Tuple2<Integer, Integer> value, Collector<Void> out) throws Exception {// no op } }); DataStream<Tuple2<Integer, Integer>> fromGen = env1 .addSource(new RichSourceFunction<Tuple2<Integer, Integer>>() { boolean running = true; @Override public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception { int i = 0; while (running) { ctx.collect(Tuple2.of(i++, getRuntimeContext().getIndexOfThisSubtask())); Thread.sleep(1); } } @Override public void cancel() { running = false; } }); kafkaServer.produceIntoKafka(fromGen, topic, new KeyedSerializationSchemaWrapper<>(schema), standardProps, null); env1.execute("Metrics test job"); } catch (Throwable t) { LOG.warn("Got exception during execution", t); if (!(t.getCause() instanceof JobCancellationException)) { // we'll cancel the job error.f0 = t; } } } }; Thread jobThread = new Thread(job); jobThread.start(); try { // connect to JMX MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); // wait until we've found all 5 offset metrics Set<ObjectName> offsetMetrics = mBeanServer.queryNames(new ObjectName("*current-offsets*:*"), null); while (offsetMetrics.size() < 5) { // test will time out if metrics are not properly working if (error.f0 != null) { // fail test early throw error.f0; } offsetMetrics = mBeanServer.queryNames(new ObjectName("*current-offsets*:*"), null); Thread.sleep(50); } Assert.assertEquals(5, offsetMetrics.size()); // we can't rely on the consumer to have touched all the partitions already // that's why we'll wait until all five partitions have a positive offset. // The test will fail if we never meet the condition while (true) { int numPosOffsets = 0; // check that offsets are correctly reported for (ObjectName object : offsetMetrics) { Object offset = mBeanServer.getAttribute(object, "Value"); if ((long) offset >= 0) { numPosOffsets++; } } if (numPosOffsets == 5) { break; } // wait for the consumer to consume on all partitions Thread.sleep(50); } // check if producer metrics are also available. Set<ObjectName> producerMetrics = mBeanServer.queryNames(new ObjectName("*KafkaProducer*:*"), null); Assert.assertTrue("No producer metrics found", producerMetrics.size() > 30); LOG.info("Found all JMX metrics. Cancelling job."); } finally { // cancel JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout)); } while (jobThread.isAlive()) { Thread.sleep(50); } if (error.f0 != null) { throw error.f0; } deleteTestTopic(topic); }
From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java
/** * This test ensures that when explicitly set to start from latest record, the consumer * ignores the "auto.offset.reset" behaviour as well as any committed group offsets in Kafka. *//* ww w . j av a 2 s . c om*/ public void runStartFromLatestOffsets() throws Exception { // 50 records written to each of 3 partitions before launching a latest-starting consuming job final int parallelism = 3; final int recordsInEachPartition = 50; // each partition will be written an extra 200 records final int extraRecordsInEachPartition = 200; // all already existing data in the topic, before the consuming topology has started, should be ignored final String topicName = writeSequence("testStartFromLatestOffsetsTopic", recordsInEachPartition, parallelism, 1); // the committed offsets should be ignored KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler(); kafkaOffsetHandler.setCommittedOffset(topicName, 0, 23); kafkaOffsetHandler.setCommittedOffset(topicName, 1, 31); kafkaOffsetHandler.setCommittedOffset(topicName, 2, 43); // job names for the topologies for writing and consuming the extra records final String consumeExtraRecordsJobName = "Consume Extra Records Job"; final String writeExtraRecordsJobName = "Write Extra Records Job"; // seriliazation / deserialization schemas for writing and consuming the extra records final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation .of(new TypeHint<Tuple2<Integer, Integer>>() { }); final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema = new KeyedSerializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); final KeyedDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KeyedDeserializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); // setup and run the latest-consuming job final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); env.getConfig().disableSysoutLogging(); env.setParallelism(parallelism); final Properties readProps = new Properties(); readProps.putAll(standardProps); readProps.setProperty("auto.offset.reset", "earliest"); // this should be ignored FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> latestReadingConsumer = kafkaServer.getConsumer(topicName, deserSchema, readProps); latestReadingConsumer.setStartFromLatest(); env.addSource(latestReadingConsumer).setParallelism(parallelism) .flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Object>() { @Override public void flatMap(Tuple2<Integer, Integer> value, Collector<Object> out) throws Exception { if (value.f1 - recordsInEachPartition < 0) { throw new RuntimeException( "test failed; consumed a record that was previously written: " + value); } } }).setParallelism(1).addSink(new DiscardingSink<>()); final AtomicReference<Throwable> error = new AtomicReference<>(); Thread consumeThread = new Thread(new Runnable() { @Override public void run() { try { env.execute(consumeExtraRecordsJobName); } catch (Throwable t) { if (!(t.getCause() instanceof JobCancellationException)) { error.set(t); } } } }); consumeThread.start(); // wait until the consuming job has started, to be extra safe JobManagerCommunicationUtils.waitUntilJobIsRunning(flink.getLeaderGateway(timeout), consumeExtraRecordsJobName); // setup the extra records writing job final StreamExecutionEnvironment env2 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort); DataStream<Tuple2<Integer, Integer>> extraRecordsStream = env2 .addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() { private boolean running = true; @Override public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception { int count = recordsInEachPartition; // the extra records should start from the last written value int partition = getRuntimeContext().getIndexOfThisSubtask(); while (running && count < recordsInEachPartition + extraRecordsInEachPartition) { ctx.collect(new Tuple2<>(partition, count)); count++; } } @Override public void cancel() { running = false; } }).setParallelism(parallelism); kafkaServer.produceIntoKafka(extraRecordsStream, topicName, serSchema, readProps, null); try { env2.execute(writeExtraRecordsJobName); } catch (Exception e) { throw new RuntimeException("Writing extra records failed", e); } // cancel the consume job after all extra records are written JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout), consumeExtraRecordsJobName); consumeThread.join(); kafkaOffsetHandler.close(); deleteTestTopic(topicName); // check whether the consuming thread threw any test errors; // test will fail here if the consume job had incorrectly read any records other than the extra records final Throwable consumerError = error.get(); if (consumerError != null) { throw new Exception("Exception in the consuming thread", consumerError); } }