org.apache.flink.streaming.connectors.kinesis.manualtests.ManualExactlyOnceTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.flink.streaming.connectors.kinesis.manualtests.ManualExactlyOnceTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.flink.streaming.connectors.kinesis.manualtests;

import com.amazonaws.regions.Region;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.kinesis.AmazonKinesisClient;
import com.amazonaws.services.kinesis.model.DescribeStreamResult;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.checkpoint.Checkpointed;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.connectors.kinesis.FlinkKinesisConsumer;
import org.apache.flink.streaming.connectors.kinesis.FlinkKinesisProducer;
import org.apache.flink.streaming.connectors.kinesis.config.InitialPosition;
import org.apache.flink.streaming.connectors.kinesis.config.KinesisConfigConstants;
import org.apache.flink.streaming.connectors.kinesis.util.AWSUtil;
import org.apache.flink.streaming.util.serialization.SimpleStringSchema;
import org.apache.flink.test.util.ForkableFlinkMiniCluster;
import org.apache.flink.test.util.SuccessException;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.BitSet;
import java.util.Properties;
import java.util.UUID;

import static org.apache.flink.test.util.TestUtils.tryExecute;

/**
 * This test first starts a data generator, producing data into kinesis.
 * Then, it starts a consuming topology, ensuring that all records up to a certain
 * point have been seen.
 *
 * Invocation:
 * --region eu-central-1 --accessKey XXXXXXXXXXXX --secretKey XXXXXXXXXXXXXXXX
 */
public class ManualExactlyOnceTest {

    private static final Logger LOG = LoggerFactory.getLogger(ManualExactlyOnceTest.class);

    static final long TOTAL_EVENT_COUNT = 1000; // the producer writes one per 10 ms, so it runs for 10k ms = 10 seconds

    public static void main(String[] args) throws Exception {
        final ParameterTool pt = ParameterTool.fromArgs(args);
        LOG.info("Starting exactly once test");

        // create a stream for the test:
        Properties configProps = new Properties();
        configProps.setProperty(KinesisConfigConstants.CONFIG_AWS_CREDENTIALS_PROVIDER_BASIC_ACCESSKEYID,
                pt.getRequired("accessKey"));
        configProps.setProperty(KinesisConfigConstants.CONFIG_AWS_CREDENTIALS_PROVIDER_BASIC_SECRETKEY,
                pt.getRequired("secretKey"));
        AmazonKinesisClient client = new AmazonKinesisClient(
                AWSUtil.getCredentialsProvider(configProps).getCredentials());
        client.setRegion(Region.getRegion(Regions.fromName(pt.getRequired("region"))));
        final String streamName = "flink-test-" + UUID.randomUUID().toString();
        client.createStream(streamName, 1);
        // wait until stream has been created
        DescribeStreamResult status = client.describeStream(streamName);
        LOG.info("status {}", status);
        while (!status.getStreamDescription().getStreamStatus().equals("ACTIVE")) {
            status = client.describeStream(streamName);
            LOG.info("Status of stream {}", status);
            Thread.sleep(1000);
        }

        final Configuration flinkConfig = new Configuration();
        flinkConfig.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
        flinkConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 8);
        flinkConfig.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 16);
        flinkConfig.setString(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_DELAY, "0 s");

        ForkableFlinkMiniCluster flink = new ForkableFlinkMiniCluster(flinkConfig, false);
        flink.start();

        final int flinkPort = flink.getLeaderRPCPort();

        try {
            final Tuple1<Throwable> producerException = new Tuple1<>();
            Runnable producer = new Runnable() {
                @Override
                public void run() {
                    try {
                        StreamExecutionEnvironment see = StreamExecutionEnvironment
                                .createRemoteEnvironment("localhost", flinkPort, flinkConfig);
                        see.setParallelism(2);

                        // start data generator
                        DataStream<String> simpleStringStream = see
                                .addSource(new EventsGenerator(TOTAL_EVENT_COUNT)).setParallelism(1);

                        Properties producerProps = new Properties();
                        producerProps.setProperty(
                                KinesisConfigConstants.CONFIG_AWS_CREDENTIALS_PROVIDER_BASIC_ACCESSKEYID,
                                pt.getRequired("accessKey"));
                        producerProps.setProperty(
                                KinesisConfigConstants.CONFIG_AWS_CREDENTIALS_PROVIDER_BASIC_SECRETKEY,
                                pt.getRequired("secretKey"));
                        producerProps.setProperty(KinesisConfigConstants.CONFIG_AWS_REGION,
                                pt.getRequired("region"));

                        FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new SimpleStringSchema(),
                                producerProps);

                        kinesis.setFailOnError(true);
                        kinesis.setDefaultStream(streamName);
                        kinesis.setDefaultPartition("0");
                        simpleStringStream.addSink(kinesis);

                        LOG.info("Starting producing topology");
                        see.execute("Producing topology");
                        LOG.info("Producing topo finished");
                    } catch (Exception e) {
                        LOG.warn("Error while running producing topology", e);
                        producerException.f0 = e;
                    }
                }
            };
            Thread producerThread = new Thread(producer);
            producerThread.start();

            final Tuple1<Throwable> consumerException = new Tuple1<>();
            Runnable consumer = new Runnable() {
                @Override
                public void run() {
                    try {
                        StreamExecutionEnvironment see = StreamExecutionEnvironment
                                .createRemoteEnvironment("localhost", flinkPort, flinkConfig);
                        see.setParallelism(2);
                        see.enableCheckpointing(500);
                        // we restart two times
                        see.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 500L));

                        // consuming topology
                        Properties consumerProps = new Properties();
                        consumerProps.setProperty(
                                KinesisConfigConstants.CONFIG_AWS_CREDENTIALS_PROVIDER_BASIC_ACCESSKEYID,
                                pt.getRequired("accessKey"));
                        consumerProps.setProperty(
                                KinesisConfigConstants.CONFIG_AWS_CREDENTIALS_PROVIDER_BASIC_SECRETKEY,
                                pt.getRequired("secretKey"));
                        consumerProps.setProperty(KinesisConfigConstants.CONFIG_AWS_REGION,
                                pt.getRequired("region"));
                        // start reading from beginning
                        consumerProps.setProperty(KinesisConfigConstants.CONFIG_STREAM_INIT_POSITION_TYPE,
                                InitialPosition.TRIM_HORIZON.name());
                        DataStream<String> consuming = see.addSource(
                                new FlinkKinesisConsumer<>(streamName, new SimpleStringSchema(), consumerProps));
                        consuming.flatMap(new RichFlatMapFunction<String, String>() {
                            int count = 0;

                            @Override
                            public void flatMap(String value, Collector<String> out) throws Exception {
                                if (count++ >= 200 && getRuntimeContext().getAttemptNumber() == 0) {
                                    throw new RuntimeException("Artificial failure. Restart pls");
                                }
                                out.collect(value);
                            }
                        }).flatMap(new ExactlyOnceValidatingMapper());
                        // validate consumed records for correctness
                        LOG.info("Starting consuming topology");
                        tryExecute(see, "Consuming topo");
                        LOG.info("Consuming topo finished");
                    } catch (Exception e) {
                        LOG.warn("Error while running consuming topology", e);
                        consumerException.f0 = e;
                    }
                }
            };

            Thread consumerThread = new Thread(consumer);
            consumerThread.start();

            long deadline = System.currentTimeMillis() + (1000 * 2 * 60); // wait at most for two minutes
            while (consumerThread.isAlive() || producerThread.isAlive()) {
                Thread.sleep(1000);
                if (System.currentTimeMillis() >= deadline) {
                    LOG.warn("Deadline passed");
                    break; // enough waiting
                }
            }

            if (producerThread.isAlive()) {
                producerThread.interrupt();
            }

            if (consumerThread.isAlive()) {
                consumerThread.interrupt();
            }

            if (producerException.f0 != null) {
                throw new RuntimeException("Producer failed", producerException.f0);
            }
            if (consumerException.f0 != null) {
                throw new RuntimeException("Consumer failed", consumerException.f0);
            }

            LOG.info("+++ TEST passed! +++");

        } finally {
            client.deleteStream(streamName);
            client.shutdown();

            // stopping flink
            flink.stop();
        }
    }

    // validate exactly once
    private static class ExactlyOnceValidatingMapper
            implements FlatMapFunction<String, String>, Checkpointed<BitSet> {
        BitSet validator = new BitSet((int) TOTAL_EVENT_COUNT);

        @Override
        public void flatMap(String value, Collector<String> out) throws Exception {
            LOG.info("Consumed {}", value);

            int id = Integer.parseInt(value.split("-")[0]);
            if (validator.get(id)) {
                throw new RuntimeException("Saw id " + id + " twice!");
            }
            validator.set(id);
            if (id > 999) {
                throw new RuntimeException("Out of bounds ID observed");
            }

            if (validator.nextClearBit(0) == 1000) { // 0 - 1000 are set
                throw new SuccessException();
            }
        }

        @Override
        public BitSet snapshotState(long checkpointId, long checkpointTimestamp) throws Exception {
            return validator;
        }

        @Override
        public void restoreState(BitSet state) throws Exception {
            this.validator = state;
        }
    }

    public static class EventsGenerator implements SourceFunction<String> {
        private boolean running = true;
        private final long limit;

        public EventsGenerator(long limit) {
            this.limit = limit;
        }

        @Override
        public void run(SourceContext<String> ctx) throws Exception {
            long seq = 0;
            while (running) {
                Thread.sleep(10);
                String evt = (seq++) + "-" + RandomStringUtils.randomAlphabetic(12);
                ctx.collect(evt);
                LOG.info("Emitting event {}", evt);
                if (seq >= limit) {
                    break;
                }
            }
            ctx.close();
            LOG.info("Stopping events generator");
        }

        @Override
        public void cancel() {
            running = false;
        }
    }
}