Example usage for java.util Properties putAll

Introduction

In this page you can find the example usage for java.util Properties putAll.

Prototype

@Override
    public synchronized void putAll(Map<?, ?> t)

Source Link

Usage

From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java

/**
 * Tests that the source can be properly canceled when reading empty partitions. 
 *//*w ww .ja  v  a2 s  .co  m*/
public void runCancelingOnEmptyInputTest() throws Exception {
    final String topic = "cancelingOnEmptyInputTopic";

    final int parallelism = 3;
    createTestTopic(topic, parallelism, 1);

    final AtomicReference<Throwable> error = new AtomicReference<>();

    final Runnable jobRunner = new Runnable() {
        @Override
        public void run() {
            try {
                final StreamExecutionEnvironment env = StreamExecutionEnvironment
                        .createRemoteEnvironment("localhost", flinkPort);
                env.setParallelism(parallelism);
                env.enableCheckpointing(100);
                env.getConfig().disableSysoutLogging();

                Properties props = new Properties();
                props.putAll(standardProps);
                props.putAll(secureProps);
                FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(),
                        props);

                env.addSource(source).addSink(new DiscardingSink<String>());

                env.execute("CancelingOnEmptyInputTest");
            } catch (Throwable t) {
                LOG.error("Job Runner failed with exception", t);
                error.set(t);
            }
        }
    };

    Thread runnerThread = new Thread(jobRunner, "program runner thread");
    runnerThread.start();

    // wait a bit before canceling
    Thread.sleep(2000);

    Throwable failueCause = error.get();
    if (failueCause != null) {
        failueCause.printStackTrace();
        Assert.fail("Test failed prematurely with: " + failueCause.getMessage());
    }
    // cancel
    JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));

    // wait for the program to be done and validate that we failed with the right exception
    runnerThread.join();

    failueCause = error.get();
    assertNotNull("program did not fail properly due to canceling", failueCause);
    assertTrue(failueCause.getMessage().contains("Job was cancelled"));

    deleteTestTopic(topic);
}

From source file:org.apache.falcon.oozie.OozieCoordinatorBuilder.java

protected Properties createCoordDefaultConfiguration(Cluster cluster, String coordName) throws FalconException {
    Properties props = new Properties();
    props.put(ARG.entityName.getPropName(), entity.getName());
    props.put(ARG.nominalTime.getPropName(), NOMINAL_TIME_EL);
    props.put(ARG.timeStamp.getPropName(), ACTUAL_TIME_EL);
    props.put("userBrokerUrl", ClusterHelper.getMessageBrokerUrl(cluster));
    props.put("userBrokerImplClass", ClusterHelper.getMessageBrokerImplClass(cluster));
    String falconBrokerUrl = StartupProperties.get().getProperty(ARG.brokerUrl.getPropName(),
            "tcp://localhost:61616?daemon=true");
    props.put(ARG.brokerUrl.getPropName(), falconBrokerUrl);
    String falconBrokerImplClass = StartupProperties.get().getProperty(ARG.brokerImplClass.getPropName(),
            ClusterHelper.DEFAULT_BROKER_IMPL_CLASS);
    props.put(ARG.brokerImplClass.getPropName(), falconBrokerImplClass);
    String jmsMessageTTL = StartupProperties.get().getProperty("broker.ttlInMins",
            DEFAULT_BROKER_MSG_TTL.toString());
    props.put(ARG.brokerTTL.getPropName(), jmsMessageTTL);
    props.put(ARG.entityType.getPropName(), entity.getEntityType().name());
    props.put("logDir", getStoragePath(new Path(EntityUtil.getBaseStagingPath(cluster, entity), "logs")));
    props.put(OozieClient.EXTERNAL_ID, new ExternalId(entity.getName(),
            EntityUtil.getWorkflowNameTag(coordName, entity), "${coord:nominalTime()}").getId());
    props.put("workflowEngineUrl", ClusterHelper.getOozieUrl(cluster));

    if (EntityUtil.getLateProcess(entity) == null || EntityUtil.getLateProcess(entity).getLateInputs() == null
            || EntityUtil.getLateProcess(entity).getLateInputs().size() == 0) {
        props.put("shouldRecord", "false");
    } else {//  w  w w  . j av  a  2s  .  co m
        props.put("shouldRecord", "true");
    }

    props.put("entityName", entity.getName());
    props.put("entityType", entity.getEntityType().name().toLowerCase());
    props.put(ARG.cluster.getPropName(), cluster.getName());

    props.put(MR_QUEUE_NAME, "default");
    props.put(MR_JOB_PRIORITY, "NORMAL");
    //props in entity override the set props.
    props.putAll(getEntityProperties(entity));
    return props;
}

From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java

/**
 * This test ensures that when the consumers retrieve some start offset from kafka (earliest, latest), that this offset
 * is committed to Kafka, even if some partitions are not read.
 *
 * Test:/*  w ww . j  a  va  2 s.com*/
 * - Create 3 partitions
 * - write 50 messages into each.
 * - Start three consumers with auto.offset.reset='latest' and wait until they committed into Kafka.
 * - Check if the offsets in Kafka are set to 50 for the three partitions
 *
 * See FLINK-3440 as well
 */
public void runAutoOffsetRetrievalAndCommitToKafka() throws Exception {
    // 3 partitions with 50 records each (0-49, so the expected commit offset of each partition should be 50)
    final int parallelism = 3;
    final int recordsInEachPartition = 50;

    final String topicName = writeSequence("testAutoOffsetRetrievalAndCommitToKafkaTopic",
            recordsInEachPartition, parallelism, 1);

    final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost",
            flinkPort);
    env.getConfig().disableSysoutLogging();
    env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env.setParallelism(parallelism);
    env.enableCheckpointing(200);

    Properties readProps = new Properties();
    readProps.putAll(standardProps);
    readProps.setProperty("auto.offset.reset", "latest"); // set to reset to latest, so that partitions are initially not read

    DataStream<String> stream = env
            .addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), readProps));
    stream.addSink(new DiscardingSink<String>());

    final AtomicReference<Throwable> errorRef = new AtomicReference<>();
    final Thread runner = new Thread("runner") {
        @Override
        public void run() {
            try {
                env.execute();
            } catch (Throwable t) {
                if (!(t.getCause() instanceof JobCancellationException)) {
                    errorRef.set(t);
                }
            }
        }
    };
    runner.start();

    KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();

    final Long l50 = 50L; // the final committed offset in Kafka should be 50
    final long deadline = 30_000_000_000L + System.nanoTime();
    do {
        Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
        Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
        Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);

        if (l50.equals(o1) && l50.equals(o2) && l50.equals(o3)) {
            break;
        }

        Thread.sleep(100);
    } while (System.nanoTime() < deadline);

    // cancel the job
    JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));

    final Throwable t = errorRef.get();
    if (t != null) {
        throw new RuntimeException("Job failed with an exception", t);
    }

    // final check to see if offsets are correctly in Kafka
    Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
    Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
    Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
    Assert.assertEquals(Long.valueOf(50L), o1);
    Assert.assertEquals(Long.valueOf(50L), o2);
    Assert.assertEquals(Long.valueOf(50L), o3);

    kafkaOffsetHandler.close();
    deleteTestTopic(topicName);
}

From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java

public void runKeyValueTest() throws Exception {
    final String topic = "keyvaluetest";
    createTestTopic(topic, 1, 1);//  w  w  w.j  a v a2  s.  c  o  m
    final int ELEMENT_COUNT = 5000;

    // ----------- Write some data into Kafka -------------------

    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.setParallelism(1);
    env.setRestartStrategy(RestartStrategies.noRestart());
    env.getConfig().disableSysoutLogging();

    DataStream<Tuple2<Long, PojoValue>> kvStream = env.addSource(new SourceFunction<Tuple2<Long, PojoValue>>() {
        @Override
        public void run(SourceContext<Tuple2<Long, PojoValue>> ctx) throws Exception {
            Random rnd = new Random(1337);
            for (long i = 0; i < ELEMENT_COUNT; i++) {
                PojoValue pojo = new PojoValue();
                pojo.when = new Date(rnd.nextLong());
                pojo.lon = rnd.nextLong();
                pojo.lat = i;
                // make every second key null to ensure proper "null" serialization
                Long key = (i % 2 == 0) ? null : i;
                ctx.collect(new Tuple2<>(key, pojo));
            }
        }

        @Override
        public void cancel() {
        }
    });

    KeyedSerializationSchema<Tuple2<Long, PojoValue>> schema = new TypeInformationKeyValueSerializationSchema<>(
            Long.class, PojoValue.class, env.getConfig());
    Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
    producerProperties.setProperty("retries", "3");
    kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null);
    env.execute("Write KV to Kafka");

    // ----------- Read the data again -------------------

    env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.setParallelism(1);
    env.setRestartStrategy(RestartStrategies.noRestart());
    env.getConfig().disableSysoutLogging();

    KeyedDeserializationSchema<Tuple2<Long, PojoValue>> readSchema = new TypeInformationKeyValueSerializationSchema<>(
            Long.class, PojoValue.class, env.getConfig());

    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    DataStream<Tuple2<Long, PojoValue>> fromKafka = env
            .addSource(kafkaServer.getConsumer(topic, readSchema, props));
    fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long, PojoValue>, Object>() {
        long counter = 0;

        @Override
        public void flatMap(Tuple2<Long, PojoValue> value, Collector<Object> out) throws Exception {
            // the elements should be in order.
            Assert.assertTrue("Wrong value " + value.f1.lat, value.f1.lat == counter);
            if (value.f1.lat % 2 == 0) {
                assertNull("key was not null", value.f0);
            } else {
                Assert.assertTrue("Wrong value " + value.f0, value.f0 == counter);
            }
            counter++;
            if (counter == ELEMENT_COUNT) {
                // we got the right number of elements
                throw new SuccessException();
            }
        }
    });

    tryExecute(env, "Read KV from Kafka");

    deleteTestTopic(topic);
}

From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java

/**
 * Test producing and consuming into multiple topics
 * @throws java.lang.Exception/*from  w  w  w.jav a2 s  . c  o  m*/
 */
public void runProduceConsumeMultipleTopics() throws java.lang.Exception {
    final int NUM_TOPICS = 5;
    final int NUM_ELEMENTS = 20;

    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.getConfig().disableSysoutLogging();

    // create topics with content
    final List<String> topics = new ArrayList<>();
    for (int i = 0; i < NUM_TOPICS; i++) {
        final String topic = "topic-" + i;
        topics.add(topic);
        // create topic
        createTestTopic(topic, i + 1 /*partitions*/, 1);
    }
    // run first job, producing into all topics
    DataStream<Tuple3<Integer, Integer, String>> stream = env
            .addSource(new RichParallelSourceFunction<Tuple3<Integer, Integer, String>>() {

                @Override
                public void run(SourceContext<Tuple3<Integer, Integer, String>> ctx) throws Exception {
                    int partition = getRuntimeContext().getIndexOfThisSubtask();

                    for (int topicId = 0; topicId < NUM_TOPICS; topicId++) {
                        for (int i = 0; i < NUM_ELEMENTS; i++) {
                            ctx.collect(new Tuple3<>(partition, i, "topic-" + topicId));
                        }
                    }
                }

                @Override
                public void cancel() {
                }
            });

    Tuple2WithTopicSchema schema = new Tuple2WithTopicSchema(env.getConfig());

    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    kafkaServer.produceIntoKafka(stream, "dummy", schema, props, null);

    env.execute("Write to topics");

    // run second job consuming from multiple topics
    env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.getConfig().disableSysoutLogging();

    stream = env.addSource(kafkaServer.getConsumer(topics, schema, props));

    stream.flatMap(new FlatMapFunction<Tuple3<Integer, Integer, String>, Integer>() {
        Map<String, Integer> countPerTopic = new HashMap<>(NUM_TOPICS);

        @Override
        public void flatMap(Tuple3<Integer, Integer, String> value, Collector<Integer> out) throws Exception {
            Integer count = countPerTopic.get(value.f2);
            if (count == null) {
                count = 1;
            } else {
                count++;
            }
            countPerTopic.put(value.f2, count);

            // check map:
            for (Map.Entry<String, Integer> el : countPerTopic.entrySet()) {
                if (el.getValue() < NUM_ELEMENTS) {
                    break; // not enough yet
                }
                if (el.getValue() > NUM_ELEMENTS) {
                    throw new RuntimeException("There is a failure in the test. I've read " + el.getValue()
                            + " from topic " + el.getKey());
                }
            }
            // we've seen messages from all topics
            throw new SuccessException();
        }
    }).setParallelism(1);

    tryExecute(env, "Count elements from the topics");

    // delete all topics again
    for (int i = 0; i < NUM_TOPICS; i++) {
        final String topic = "topic-" + i;
        deleteTestTopic(topic);
    }
}

From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java

/**
 * Tests that the source can be properly canceled when reading full partitions. 
 *///from w  w w .  j a  va 2  s .c  o  m
public void runCancelingOnFullInputTest() throws Exception {
    final String topic = "cancelingOnFullTopic";

    final int parallelism = 3;
    createTestTopic(topic, parallelism, 1);

    // launch a producer thread
    DataGenerators.InfiniteStringsGenerator generator = new DataGenerators.InfiniteStringsGenerator(kafkaServer,
            topic);
    generator.start();

    // launch a consumer asynchronously

    final AtomicReference<Throwable> jobError = new AtomicReference<>();

    final Runnable jobRunner = new Runnable() {
        @Override
        public void run() {
            try {
                final StreamExecutionEnvironment env = StreamExecutionEnvironment
                        .createRemoteEnvironment("localhost", flinkPort);
                env.setParallelism(parallelism);
                env.enableCheckpointing(100);
                env.getConfig().disableSysoutLogging();

                Properties props = new Properties();
                props.putAll(standardProps);
                props.putAll(secureProps);
                FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(),
                        props);

                env.addSource(source).addSink(new DiscardingSink<String>());

                env.execute("Runner for CancelingOnFullInputTest");
            } catch (Throwable t) {
                jobError.set(t);
            }
        }
    };

    Thread runnerThread = new Thread(jobRunner, "program runner thread");
    runnerThread.start();

    // wait a bit before canceling
    Thread.sleep(2000);

    Throwable failueCause = jobError.get();
    if (failueCause != null) {
        failueCause.printStackTrace();
        Assert.fail("Test failed prematurely with: " + failueCause.getMessage());
    }

    // cancel
    JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout),
            "Runner for CancelingOnFullInputTest");

    // wait for the program to be done and validate that we failed with the right exception
    runnerThread.join();

    failueCause = jobError.get();
    assertNotNull("program did not fail properly due to canceling", failueCause);
    assertTrue(failueCause.getMessage().contains("Job was cancelled"));

    if (generator.isAlive()) {
        generator.shutdown();
        generator.join();
    } else {
        Throwable t = generator.getError();
        if (t != null) {
            t.printStackTrace();
            fail("Generator failed: " + t.getMessage());
        } else {
            fail("Generator failed with no exception");
        }
    }

    deleteTestTopic(topic);
}

From source file:net.ssehub.easy.instantiation.core.model.buildlangModel.BuildlangExecution.java

/**
 * Loads properties from <code>file</code> into <code>prop</code> possibly overriding existing
 * properties. /*from  w ww.  ja  v  a 2 s .c  o m*/
 * 
 * @param file the file name
 * @param prop the loaded properties (to be modified as a side effect)
 * @param os if not <b>null</b> to be inserted after the last "." with a following ".". If file
 *   not exists, no exception will be thrown.
 * @throws VilException in case of loading problems
 */
private void loadProperties(File file, Properties prop, String os) throws VilException {
    boolean loadFile = true;
    if (null != os) {
        String f = file.toString();
        int pos = f.lastIndexOf('.');
        if (pos > 0 && pos < f.length()) {
            f = f.substring(0, pos + 1) + os + "." + f.substring(pos + 1);
            file = new File(f);
            loadFile = file.exists();
        } else {
            loadFile = false;
        }
    }
    if (loadFile) {
        try {
            FileInputStream fis = new FileInputStream(file);
            Properties p = new Properties();
            p.load(fis);
            prop.putAll(p);
            fis.close();

            for (String key : prop.stringPropertyNames()) {
                String value = prop.getProperty(key);
                // Replace value
                try {
                    value = StringReplacer.substitute(value, new Resolver(environment), getExpressionParser(),
                            this, null);
                } catch (VilException e) {
                    EASyLoggerFactory.INSTANCE.getLogger(getClass(), Bundle.ID).exception(e);
                }
                prop.setProperty(key, value);
            }
        } catch (IOException e) {
            throw new VilException(e.getMessage(), e, VilException.ID_IO);
        }
    }
}

From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java

/**
 * Test Flink's Kafka integration also with very big records (30MB)
 * see http://stackoverflow.com/questions/21020347/kafka-sending-a-15mb-message
 *
 *//*from w  ww.j  av a 2s.co m*/
public void runBigRecordTestTopology() throws Exception {

    final String topic = "bigRecordTestTopic";
    final int parallelism = 1; // otherwise, the kafka mini clusters may run out of heap space

    createTestTopic(topic, parallelism, 1);

    final TypeInformation<Tuple2<Long, byte[]>> longBytesInfo = TypeInfoParser.parse("Tuple2<Long, byte[]>");

    final TypeInformationSerializationSchema<Tuple2<Long, byte[]>> serSchema = new TypeInformationSerializationSchema<>(
            longBytesInfo, new ExecutionConfig());

    final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost",
            flinkPort);
    env.setRestartStrategy(RestartStrategies.noRestart());
    env.getConfig().disableSysoutLogging();
    env.enableCheckpointing(100);
    env.setParallelism(parallelism);

    // add consuming topology:
    Properties consumerProps = new Properties();
    consumerProps.putAll(standardProps);
    consumerProps.setProperty("fetch.message.max.bytes", Integer.toString(1024 * 1024 * 14));
    consumerProps.setProperty("max.partition.fetch.bytes", Integer.toString(1024 * 1024 * 14)); // for the new fetcher
    consumerProps.setProperty("queued.max.message.chunks", "1");
    consumerProps.putAll(secureProps);

    FlinkKafkaConsumerBase<Tuple2<Long, byte[]>> source = kafkaServer.getConsumer(topic, serSchema,
            consumerProps);
    DataStreamSource<Tuple2<Long, byte[]>> consuming = env.addSource(source);

    consuming.addSink(new SinkFunction<Tuple2<Long, byte[]>>() {

        private int elCnt = 0;

        @Override
        public void invoke(Tuple2<Long, byte[]> value) throws Exception {
            elCnt++;
            if (value.f0 == -1) {
                // we should have seen 11 elements now.
                if (elCnt == 11) {
                    throw new SuccessException();
                } else {
                    throw new RuntimeException("There have been " + elCnt + " elements");
                }
            }
            if (elCnt > 10) {
                throw new RuntimeException("More than 10 elements seen: " + elCnt);
            }
        }
    });

    // add producing topology
    Properties producerProps = new Properties();
    producerProps.setProperty("max.request.size", Integer.toString(1024 * 1024 * 15));
    producerProps.setProperty("retries", "3");
    producerProps.putAll(secureProps);
    producerProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerConnectionStrings);

    DataStream<Tuple2<Long, byte[]>> stream = env.addSource(new RichSourceFunction<Tuple2<Long, byte[]>>() {

        private boolean running;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            running = true;
        }

        @Override
        public void run(SourceContext<Tuple2<Long, byte[]>> ctx) throws Exception {
            Random rnd = new Random();
            long cnt = 0;
            int sevenMb = 1024 * 1024 * 7;

            while (running) {
                byte[] wl = new byte[sevenMb + rnd.nextInt(sevenMb)];
                ctx.collect(new Tuple2<>(cnt++, wl));

                Thread.sleep(100);

                if (cnt == 10) {
                    // signal end
                    ctx.collect(new Tuple2<>(-1L, new byte[] { 1 }));
                    break;
                }
            }
        }

        @Override
        public void cancel() {
            running = false;
        }
    });

    kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(serSchema), producerProps,
            null);

    tryExecute(env, "big topology test");
    deleteTestTopic(topic);
}

From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java

/**
 * Test metrics reporting for consumer/* ww w.j av  a 2 s  .c o m*/
 *
 * @throws Exception
 */
public void runMetricsTest() throws Throwable {

    // create a stream with 5 topics
    final String topic = "metricsStream";
    createTestTopic(topic, 5, 1);

    final Tuple1<Throwable> error = new Tuple1<>(null);
    Runnable job = new Runnable() {
        @Override
        public void run() {
            try {
                // start job writing & reading data.
                final StreamExecutionEnvironment env1 = StreamExecutionEnvironment
                        .createRemoteEnvironment("localhost", flinkPort);
                env1.setParallelism(1);
                env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
                env1.getConfig().disableSysoutLogging();
                env1.disableOperatorChaining(); // let the source read everything into the network buffers

                Properties props = new Properties();
                props.putAll(standardProps);
                props.putAll(secureProps);

                TypeInformationSerializationSchema<Tuple2<Integer, Integer>> schema = new TypeInformationSerializationSchema<>(
                        TypeInfoParser.<Tuple2<Integer, Integer>>parse("Tuple2<Integer, Integer>"),
                        env1.getConfig());
                DataStream<Tuple2<Integer, Integer>> fromKafka = env1
                        .addSource(kafkaServer.getConsumer(topic, schema, standardProps));
                fromKafka.flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Void>() {
                    @Override
                    public void flatMap(Tuple2<Integer, Integer> value, Collector<Void> out) throws Exception {// no op
                    }
                });

                DataStream<Tuple2<Integer, Integer>> fromGen = env1
                        .addSource(new RichSourceFunction<Tuple2<Integer, Integer>>() {
                            boolean running = true;

                            @Override
                            public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
                                int i = 0;
                                while (running) {
                                    ctx.collect(Tuple2.of(i++, getRuntimeContext().getIndexOfThisSubtask()));
                                    Thread.sleep(1);
                                }
                            }

                            @Override
                            public void cancel() {
                                running = false;
                            }
                        });

                kafkaServer.produceIntoKafka(fromGen, topic, new KeyedSerializationSchemaWrapper<>(schema),
                        standardProps, null);

                env1.execute("Metrics test job");
            } catch (Throwable t) {
                LOG.warn("Got exception during execution", t);
                if (!(t.getCause() instanceof JobCancellationException)) { // we'll cancel the job
                    error.f0 = t;
                }
            }
        }
    };
    Thread jobThread = new Thread(job);
    jobThread.start();

    try {
        // connect to JMX
        MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer();
        // wait until we've found all 5 offset metrics
        Set<ObjectName> offsetMetrics = mBeanServer.queryNames(new ObjectName("*current-offsets*:*"), null);
        while (offsetMetrics.size() < 5) { // test will time out if metrics are not properly working
            if (error.f0 != null) {
                // fail test early
                throw error.f0;
            }
            offsetMetrics = mBeanServer.queryNames(new ObjectName("*current-offsets*:*"), null);
            Thread.sleep(50);
        }
        Assert.assertEquals(5, offsetMetrics.size());
        // we can't rely on the consumer to have touched all the partitions already
        // that's why we'll wait until all five partitions have a positive offset.
        // The test will fail if we never meet the condition
        while (true) {
            int numPosOffsets = 0;
            // check that offsets are correctly reported
            for (ObjectName object : offsetMetrics) {
                Object offset = mBeanServer.getAttribute(object, "Value");
                if ((long) offset >= 0) {
                    numPosOffsets++;
                }
            }
            if (numPosOffsets == 5) {
                break;
            }
            // wait for the consumer to consume on all partitions
            Thread.sleep(50);
        }

        // check if producer metrics are also available.
        Set<ObjectName> producerMetrics = mBeanServer.queryNames(new ObjectName("*KafkaProducer*:*"), null);
        Assert.assertTrue("No producer metrics found", producerMetrics.size() > 30);

        LOG.info("Found all JMX metrics. Cancelling job.");
    } finally {
        // cancel
        JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));
    }

    while (jobThread.isAlive()) {
        Thread.sleep(50);
    }
    if (error.f0 != null) {
        throw error.f0;
    }

    deleteTestTopic(topic);
}

From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java

/**
 * This test ensures that when explicitly set to start from latest record, the consumer
 * ignores the "auto.offset.reset" behaviour as well as any committed group offsets in Kafka.
 *//*  ww w  .  j av  a  2 s  .  c om*/
public void runStartFromLatestOffsets() throws Exception {
    // 50 records written to each of 3 partitions before launching a latest-starting consuming job
    final int parallelism = 3;
    final int recordsInEachPartition = 50;

    // each partition will be written an extra 200 records
    final int extraRecordsInEachPartition = 200;

    // all already existing data in the topic, before the consuming topology has started, should be ignored
    final String topicName = writeSequence("testStartFromLatestOffsetsTopic", recordsInEachPartition,
            parallelism, 1);

    // the committed offsets should be ignored
    KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
    kafkaOffsetHandler.setCommittedOffset(topicName, 0, 23);
    kafkaOffsetHandler.setCommittedOffset(topicName, 1, 31);
    kafkaOffsetHandler.setCommittedOffset(topicName, 2, 43);

    // job names for the topologies for writing and consuming the extra records
    final String consumeExtraRecordsJobName = "Consume Extra Records Job";
    final String writeExtraRecordsJobName = "Write Extra Records Job";

    // seriliazation / deserialization schemas for writing and consuming the extra records
    final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation
            .of(new TypeHint<Tuple2<Integer, Integer>>() {
            });

    final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema = new KeyedSerializationSchemaWrapper<>(
            new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

    final KeyedDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KeyedDeserializationSchemaWrapper<>(
            new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

    // setup and run the latest-consuming job
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost",
            flinkPort);
    env.getConfig().disableSysoutLogging();
    env.setParallelism(parallelism);

    final Properties readProps = new Properties();
    readProps.putAll(standardProps);
    readProps.setProperty("auto.offset.reset", "earliest"); // this should be ignored

    FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> latestReadingConsumer = kafkaServer.getConsumer(topicName,
            deserSchema, readProps);
    latestReadingConsumer.setStartFromLatest();

    env.addSource(latestReadingConsumer).setParallelism(parallelism)
            .flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Object>() {
                @Override
                public void flatMap(Tuple2<Integer, Integer> value, Collector<Object> out) throws Exception {
                    if (value.f1 - recordsInEachPartition < 0) {
                        throw new RuntimeException(
                                "test failed; consumed a record that was previously written: " + value);
                    }
                }
            }).setParallelism(1).addSink(new DiscardingSink<>());

    final AtomicReference<Throwable> error = new AtomicReference<>();
    Thread consumeThread = new Thread(new Runnable() {
        @Override
        public void run() {
            try {
                env.execute(consumeExtraRecordsJobName);
            } catch (Throwable t) {
                if (!(t.getCause() instanceof JobCancellationException)) {
                    error.set(t);
                }
            }
        }
    });
    consumeThread.start();

    // wait until the consuming job has started, to be extra safe
    JobManagerCommunicationUtils.waitUntilJobIsRunning(flink.getLeaderGateway(timeout),
            consumeExtraRecordsJobName);

    // setup the extra records writing job
    final StreamExecutionEnvironment env2 = StreamExecutionEnvironment.createRemoteEnvironment("localhost",
            flinkPort);

    DataStream<Tuple2<Integer, Integer>> extraRecordsStream = env2
            .addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

                private boolean running = true;

                @Override
                public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
                    int count = recordsInEachPartition; // the extra records should start from the last written value
                    int partition = getRuntimeContext().getIndexOfThisSubtask();

                    while (running && count < recordsInEachPartition + extraRecordsInEachPartition) {
                        ctx.collect(new Tuple2<>(partition, count));
                        count++;
                    }
                }

                @Override
                public void cancel() {
                    running = false;
                }
            }).setParallelism(parallelism);

    kafkaServer.produceIntoKafka(extraRecordsStream, topicName, serSchema, readProps, null);

    try {
        env2.execute(writeExtraRecordsJobName);
    } catch (Exception e) {
        throw new RuntimeException("Writing extra records failed", e);
    }

    // cancel the consume job after all extra records are written
    JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout), consumeExtraRecordsJobName);
    consumeThread.join();

    kafkaOffsetHandler.close();
    deleteTestTopic(topicName);

    // check whether the consuming thread threw any test errors;
    // test will fail here if the consume job had incorrectly read any records other than the extra records
    final Throwable consumerError = error.get();
    if (consumerError != null) {
        throw new Exception("Exception in the consuming thread", consumerError);
    }
}