org.apache.flink.streaming.runtime.operators.windowing.NonKeyedWindowOperator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.flink.streaming.runtime.operators.windowing.NonKeyedWindowOperator.java

Source

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.runtime.operators.windowing;

import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang.SerializationUtils;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.state.OperatorState;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.typeutils.InputTypeConfigurable;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.runtime.state.StateBackend;
import org.apache.flink.runtime.state.StateHandle;
import org.apache.flink.streaming.api.functions.windowing.AllWindowFunction;
import org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator;
import org.apache.flink.streaming.api.operators.ChainingStrategy;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
import org.apache.flink.streaming.api.operators.OutputTypeConfigurable;
import org.apache.flink.streaming.api.operators.TimestampedCollector;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.assigners.WindowAssigner;
import org.apache.flink.streaming.api.windowing.triggers.Trigger;
import org.apache.flink.streaming.api.windowing.windows.Window;
import org.apache.flink.streaming.runtime.operators.Triggerable;
import org.apache.flink.streaming.runtime.operators.windowing.buffers.WindowBuffer;
import org.apache.flink.streaming.runtime.operators.windowing.buffers.WindowBufferFactory;
import org.apache.flink.streaming.runtime.streamrecord.MultiplexingStreamRecordSerializer;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
import org.apache.flink.streaming.runtime.tasks.StreamTaskState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import static java.util.Objects.requireNonNull;

/**
 * Window operator for non-keyed windows.
 *
 * @see org.apache.flink.streaming.runtime.operators.windowing.WindowOperator
 *
 * @param <IN> The type of the incoming elements.
 * @param <OUT> The type of elements emitted by the {@code WindowFunction}.
 * @param <W> The type of {@code Window} that the {@code WindowAssigner} assigns.
 */
public class NonKeyedWindowOperator<IN, OUT, W extends Window>
        extends AbstractUdfStreamOperator<OUT, AllWindowFunction<IN, OUT, W>> implements
        OneInputStreamOperator<IN, OUT>, Triggerable, InputTypeConfigurable, OutputTypeConfigurable<OUT> {

    private static final long serialVersionUID = 1L;

    private static final Logger LOG = LoggerFactory.getLogger(WindowOperator.class);

    // ------------------------------------------------------------------------
    // Configuration values and stuff from the user
    // ------------------------------------------------------------------------

    private final WindowAssigner<? super IN, W> windowAssigner;

    private final Trigger<? super IN, ? super W> trigger;

    private final WindowBufferFactory<? super IN, ? extends WindowBuffer<IN>> windowBufferFactory;

    /**
     * If this is true. The current processing time is set as the timestamp of incoming elements.
     * This for use with a {@link org.apache.flink.streaming.api.windowing.evictors.TimeEvictor}
     * if eviction should happen based on processing time.
     */
    private boolean setProcessingTime = false;

    /**
     * This is used to copy the incoming element because it can be put into several window
     * buffers.
     */
    private TypeSerializer<IN> inputSerializer;

    /**
     * For serializing the window in checkpoints.
     */
    private final TypeSerializer<W> windowSerializer;

    // ------------------------------------------------------------------------
    // State that is not checkpointed
    // ------------------------------------------------------------------------

    /**
     * Processing time timers that are currently in-flight.
     */
    private transient Map<Long, Set<Context>> processingTimeTimers;

    /**
     * Current waiting watermark callbacks.
     */
    private transient Map<Long, Set<Context>> watermarkTimers;

    /**
     * This is given to the {@code WindowFunction} for emitting elements with a given timestamp.
     */
    protected transient TimestampedCollector<OUT> timestampedCollector;

    /**
     * To keep track of the current watermark so that we can immediately fire if a trigger
     * registers an event time callback for a timestamp that lies in the past.
     */
    protected transient long currentWatermark = -1L;

    // ------------------------------------------------------------------------
    // State that needs to be checkpointed
    // ------------------------------------------------------------------------

    /**
     * The windows (panes) that are currently in-flight. Each pane has a {@code WindowBuffer}
     * and a {@code TriggerContext} that stores the {@code Trigger} for that pane.
     */
    protected transient Map<W, Context> windows;

    /**
     * Creates a new {@code WindowOperator} based on the given policies and user functions.
     */
    public NonKeyedWindowOperator(WindowAssigner<? super IN, W> windowAssigner, TypeSerializer<W> windowSerializer,
            WindowBufferFactory<? super IN, ? extends WindowBuffer<IN>> windowBufferFactory,
            AllWindowFunction<IN, OUT, W> windowFunction, Trigger<? super IN, ? super W> trigger) {

        super(windowFunction);

        this.windowAssigner = requireNonNull(windowAssigner);
        this.windowSerializer = windowSerializer;

        this.windowBufferFactory = requireNonNull(windowBufferFactory);
        this.trigger = requireNonNull(trigger);

        setChainingStrategy(ChainingStrategy.ALWAYS);
    }

    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
        in.defaultReadObject();
        currentWatermark = -1;
    }

    @Override
    @SuppressWarnings("unchecked")
    public final void setInputType(TypeInformation<?> type, ExecutionConfig executionConfig) {
        inputSerializer = (TypeSerializer<IN>) type.createSerializer(executionConfig);
    }

    @Override
    public final void open() throws Exception {
        super.open();
        timestampedCollector = new TimestampedCollector<>(output);

        if (inputSerializer == null) {
            throw new IllegalStateException("Input serializer was not set.");
        }

        windowBufferFactory.setRuntimeContext(getRuntimeContext());
        windowBufferFactory.open(getUserFunctionParameters());

        // these could already be initialized from restoreState()
        if (watermarkTimers == null) {
            watermarkTimers = new HashMap<>();
        }
        if (processingTimeTimers == null) {
            processingTimeTimers = new HashMap<>();
        }
        if (windows == null) {
            windows = new HashMap<>();
        }

        // re-register timers that this window context had set
        for (Context context : windows.values()) {
            if (context.processingTimeTimer > 0) {
                Set<Context> triggers = processingTimeTimers.get(context.processingTimeTimer);
                if (triggers == null) {
                    getRuntimeContext().registerTimer(context.processingTimeTimer, NonKeyedWindowOperator.this);
                    triggers = new HashSet<>();
                    processingTimeTimers.put(context.processingTimeTimer, triggers);
                }
                triggers.add(context);
            }
            if (context.watermarkTimer > 0) {
                Set<Context> triggers = watermarkTimers.get(context.watermarkTimer);
                if (triggers == null) {
                    triggers = new HashSet<>();
                    watermarkTimers.put(context.watermarkTimer, triggers);
                }
                triggers.add(context);
            }

        }
    }

    @Override
    public final void close() throws Exception {
        super.close();
        // emit the elements that we still keep
        for (Context window : windows.values()) {
            emitWindow(window);
        }
        windows.clear();
        windowBufferFactory.close();
    }

    @Override
    @SuppressWarnings("unchecked")
    public final void processElement(StreamRecord<IN> element) throws Exception {
        if (setProcessingTime) {
            element.replace(element.getValue(), System.currentTimeMillis());
        }

        Collection<W> elementWindows = windowAssigner.assignWindows(element.getValue(), element.getTimestamp());

        for (W window : elementWindows) {
            Context context = windows.get(window);
            if (context == null) {
                WindowBuffer<IN> windowBuffer = windowBufferFactory.create();
                context = new Context(window, windowBuffer);
                windows.put(window, context);
            }
            context.windowBuffer.storeElement(element);
            Trigger.TriggerResult triggerResult = context.onElement(element);
            processTriggerResult(triggerResult, window);
        }
    }

    protected void emitWindow(Context context) throws Exception {
        timestampedCollector.setTimestamp(context.window.maxTimestamp());

        if (context.windowBuffer.size() > 0) {
            userFunction.apply(context.window, context.windowBuffer.getUnpackedElements(), timestampedCollector);
        }
    }

    private void processTriggerResult(Trigger.TriggerResult triggerResult, W window) throws Exception {
        if (!triggerResult.isFire() && !triggerResult.isPurge()) {
            // do nothing
            return;
        }
        Context context;

        if (triggerResult.isPurge()) {
            context = windows.remove(window);
        } else {
            context = windows.get(window);
        }
        if (context == null) {
            LOG.debug("Window {} already gone.", window);
            return;
        }

        if (triggerResult.isFire()) {
            emitWindow(context);
        }
    }

    @Override
    public final void processWatermark(Watermark mark) throws Exception {
        Set<Long> toRemove = new HashSet<>();
        Set<Context> toTrigger = new HashSet<>();

        // we cannot call the Trigger in here because trigger methods might register new triggers.
        // that would lead to concurrent modification errors.
        for (Map.Entry<Long, Set<Context>> triggers : watermarkTimers.entrySet()) {
            if (triggers.getKey() <= mark.getTimestamp()) {
                for (Context context : triggers.getValue()) {
                    toTrigger.add(context);
                }
                toRemove.add(triggers.getKey());
            }
        }

        for (Context context : toTrigger) {
            // double check the time. it can happen that the trigger registers a new timer,
            // in that case the entry is left in the watermarkTimers set for performance reasons.
            // We have to check here whether the entry in the set still reflects the
            // currently set timer in the Context.
            if (context.watermarkTimer <= mark.getTimestamp()) {
                Trigger.TriggerResult triggerResult = context.onEventTime(context.watermarkTimer);
                processTriggerResult(triggerResult, context.window);
            }
        }

        for (Long l : toRemove) {
            watermarkTimers.remove(l);
        }
        output.emitWatermark(mark);

        this.currentWatermark = mark.getTimestamp();
    }

    @Override
    public final void trigger(long time) throws Exception {
        Set<Long> toRemove = new HashSet<>();

        for (Map.Entry<Long, Set<Context>> triggers : processingTimeTimers.entrySet()) {
            long actualTime = triggers.getKey();
            if (actualTime <= time) {
                for (Context context : triggers.getValue()) {
                    Trigger.TriggerResult triggerResult = context.onProcessingTime(actualTime);
                    processTriggerResult(triggerResult, context.window);
                }
                toRemove.add(triggers.getKey());
            }
        }
        for (Long l : toRemove) {
            processingTimeTimers.remove(l);
        }
    }

    /**
     * The {@code Context} is responsible for keeping track of the state of one pane.
     *
     * <p>
     * A pane is the bucket of elements that have the same key (assigned by the
     * {@link org.apache.flink.api.java.functions.KeySelector}) and same {@link Window}. An element can
     * be in multiple panes of it was assigned to multiple windows by the
     * {@link org.apache.flink.streaming.api.windowing.assigners.WindowAssigner}. These panes all
     * have their own instance of the {@code Trigger}.
     */
    protected class Context implements Trigger.TriggerContext {
        protected W window;

        protected WindowBuffer<IN> windowBuffer;

        protected HashMap<String, Serializable> state;

        // use these to only allow one timer in flight at a time of each type
        // if the trigger registers another timer this value here will be overwritten,
        // the timer is not removed from the set of in-flight timers to improve performance.
        // When a trigger fires it is just checked against the last timer that was set.
        protected long watermarkTimer;
        protected long processingTimeTimer;

        public Context(W window, WindowBuffer<IN> windowBuffer) {
            this.window = window;
            this.windowBuffer = windowBuffer;
            state = new HashMap<>();

            this.watermarkTimer = -1;
            this.processingTimeTimer = -1;
        }

        @SuppressWarnings("unchecked")
        protected Context(DataInputView in) throws Exception {
            this.window = windowSerializer.deserialize(in);
            this.watermarkTimer = in.readLong();
            this.processingTimeTimer = in.readLong();

            int stateSize = in.readInt();
            byte[] stateData = new byte[stateSize];
            in.read(stateData);
            ByteArrayInputStream bais = new ByteArrayInputStream(stateData);
            state = (HashMap<String, Serializable>) SerializationUtils.deserialize(bais);

            this.windowBuffer = windowBufferFactory.create();
            int numElements = in.readInt();
            MultiplexingStreamRecordSerializer<IN> recordSerializer = new MultiplexingStreamRecordSerializer<>(
                    inputSerializer);
            for (int i = 0; i < numElements; i++) {
                windowBuffer.storeElement(recordSerializer.deserialize(in).<IN>asRecord());
            }
        }

        protected void writeToState(StateBackend.CheckpointStateOutputView out) throws IOException {
            windowSerializer.serialize(window, out);
            out.writeLong(watermarkTimer);
            out.writeLong(processingTimeTimer);

            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            SerializationUtils.serialize(state, baos);
            out.writeInt(baos.size());
            out.write(baos.toByteArray(), 0, baos.size());

            MultiplexingStreamRecordSerializer<IN> recordSerializer = new MultiplexingStreamRecordSerializer<>(
                    inputSerializer);
            out.writeInt(windowBuffer.size());
            for (StreamRecord<IN> element : windowBuffer.getElements()) {
                recordSerializer.serialize(element, out);
            }
        }

        @SuppressWarnings("unchecked")
        public <S extends Serializable> OperatorState<S> getKeyValueState(final String name, final S defaultState) {
            return new OperatorState<S>() {
                @Override
                public S value() throws IOException {
                    Serializable value = state.get(name);
                    if (value == null) {
                        state.put(name, defaultState);
                        value = defaultState;
                    }
                    return (S) value;
                }

                @Override
                public void update(S value) throws IOException {
                    state.put(name, value);
                }
            };
        }

        @Override
        public void registerProcessingTimeTimer(long time) {
            if (this.processingTimeTimer == time) {
                // we already have set a trigger for that time
                return;
            }
            Set<Context> triggers = processingTimeTimers.get(time);
            if (triggers == null) {
                getRuntimeContext().registerTimer(time, NonKeyedWindowOperator.this);
                triggers = new HashSet<>();
                processingTimeTimers.put(time, triggers);
            }
            this.processingTimeTimer = time;
            triggers.add(this);
        }

        @Override
        public void registerEventTimeTimer(long time) {
            if (watermarkTimer == time) {
                // we already have set a trigger for that time
                return;
            }
            Set<Context> triggers = watermarkTimers.get(time);
            if (triggers == null) {
                triggers = new HashSet<>();
                watermarkTimers.put(time, triggers);
            }
            this.watermarkTimer = time;
            triggers.add(this);
        }

        public Trigger.TriggerResult onElement(StreamRecord<IN> element) throws Exception {
            Trigger.TriggerResult onElementResult = trigger.onElement(element.getValue(), element.getTimestamp(),
                    window, this);
            if (watermarkTimer > 0 && watermarkTimer <= currentWatermark) {
                // fire now and don't wait for the next watermark update
                Trigger.TriggerResult onEventTimeResult = onEventTime(watermarkTimer);
                return Trigger.TriggerResult.merge(onElementResult, onEventTimeResult);
            } else {
                return onElementResult;
            }
        }

        public Trigger.TriggerResult onProcessingTime(long time) throws Exception {
            if (time == processingTimeTimer) {
                processingTimeTimer = -1;
                return trigger.onProcessingTime(time, window, this);
            } else {
                return Trigger.TriggerResult.CONTINUE;
            }
        }

        public Trigger.TriggerResult onEventTime(long time) throws Exception {
            if (time == watermarkTimer) {
                watermarkTimer = -1;
                Trigger.TriggerResult firstTriggerResult = trigger.onEventTime(time, window, this);

                if (watermarkTimer > 0 && watermarkTimer <= currentWatermark) {
                    // fire now and don't wait for the next watermark update
                    Trigger.TriggerResult secondTriggerResult = onEventTime(watermarkTimer);
                    return Trigger.TriggerResult.merge(firstTriggerResult, secondTriggerResult);
                } else {
                    return firstTriggerResult;
                }

            } else {
                return Trigger.TriggerResult.CONTINUE;
            }
        }
    }

    /**
     * When this flag is enabled the current processing time is set as the timestamp of elements
     * upon arrival. This must be used, for example, when using the
     * {@link org.apache.flink.streaming.api.windowing.evictors.TimeEvictor} with processing
     * time semantics.
     */
    public NonKeyedWindowOperator<IN, OUT, W> enableSetProcessingTime(boolean setProcessingTime) {
        this.setProcessingTime = setProcessingTime;
        return this;
    }

    @Override
    public final void setOutputType(TypeInformation<OUT> outTypeInfo, ExecutionConfig executionConfig) {
        if (userFunction instanceof OutputTypeConfigurable) {
            @SuppressWarnings("unchecked")
            OutputTypeConfigurable<OUT> typeConfigurable = (OutputTypeConfigurable<OUT>) userFunction;
            typeConfigurable.setOutputType(outTypeInfo, executionConfig);
        }
    }

    // ------------------------------------------------------------------------
    //  Checkpointing
    // ------------------------------------------------------------------------

    @Override
    public StreamTaskState snapshotOperatorState(long checkpointId, long timestamp) throws Exception {
        StreamTaskState taskState = super.snapshotOperatorState(checkpointId, timestamp);

        // we write the panes with the key/value maps into the stream
        StateBackend.CheckpointStateOutputView out = getStateBackend().createCheckpointStateOutputView(checkpointId,
                timestamp);

        int numWindows = windows.size();
        out.writeInt(numWindows);
        for (Context context : windows.values()) {
            context.writeToState(out);
        }

        taskState.setOperatorState(out.closeAndGetHandle());
        return taskState;
    }

    @Override
    public void restoreState(StreamTaskState taskState) throws Exception {
        super.restoreState(taskState);

        @SuppressWarnings("unchecked")
        StateHandle<DataInputView> inputState = (StateHandle<DataInputView>) taskState.getOperatorState();
        DataInputView in = inputState.getState(getUserCodeClassloader());

        int numWindows = in.readInt();
        this.windows = new HashMap<>(numWindows);
        this.processingTimeTimers = new HashMap<>();
        this.watermarkTimers = new HashMap<>();

        for (int j = 0; j < numWindows; j++) {
            Context context = new Context(in);
            windows.put(context.window, context);
        }
    }

    // ------------------------------------------------------------------------
    // Getters for testing
    // ------------------------------------------------------------------------

    @VisibleForTesting
    public boolean isSetProcessingTime() {
        return setProcessingTime;
    }

    @VisibleForTesting
    public Trigger<? super IN, ? super W> getTrigger() {
        return trigger;
    }

    @VisibleForTesting
    public WindowAssigner<? super IN, W> getWindowAssigner() {
        return windowAssigner;
    }

    @VisibleForTesting
    public WindowBufferFactory<? super IN, ? extends WindowBuffer<IN>> getWindowBufferFactory() {
        return windowBufferFactory;
    }
}