org.apache.beam.runners.direct.SideInputContainer.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.beam.runners.direct.SideInputContainer.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.runners.direct;

import static com.google.common.base.Preconditions.checkArgument;

import com.google.common.base.MoreObjects;
import com.google.common.base.Optional;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import javax.annotation.Nullable;
import org.apache.beam.runners.core.ReadyCheckingSideInputReader;
import org.apache.beam.runners.core.SideInputReader;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.transforms.windowing.PaneInfo;
import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.WindowingStrategy;

/**
 * An in-process container for {@link PCollectionView PCollectionViews}, which provides methods for
 * constructing {@link SideInputReader SideInputReaders} which block until a side input is
 * available and writing to a {@link PCollectionView}.
 */
class SideInputContainer {
    private final Collection<PCollectionView<?>> containedViews;
    private final LoadingCache<PCollectionViewWindow<?>, AtomicReference<Iterable<? extends WindowedValue<?>>>> viewByWindows;

    /**
     * Create a new {@link SideInputContainer} with the provided views and the provided
     * context.
     */
    public static SideInputContainer create(final EvaluationContext context,
            Collection<PCollectionView<?>> containedViews) {
        LoadingCache<PCollectionViewWindow<?>, AtomicReference<Iterable<? extends WindowedValue<?>>>> viewByWindows = CacheBuilder
                .newBuilder().build(new CallbackSchedulingLoader(context));
        return new SideInputContainer(containedViews, viewByWindows);
    }

    private SideInputContainer(Collection<PCollectionView<?>> containedViews,
            LoadingCache<PCollectionViewWindow<?>, AtomicReference<Iterable<? extends WindowedValue<?>>>> viewByWindows) {
        this.containedViews = ImmutableSet.copyOf(containedViews);
        this.viewByWindows = viewByWindows;
    }

    /**
     * Return a view of this {@link SideInputContainer} that contains only the views in the
     * provided argument. The returned {@link SideInputContainer} is unmodifiable without
     * casting, but will change as this {@link SideInputContainer} is modified.
     */
    public ReadyCheckingSideInputReader createReaderForViews(Collection<PCollectionView<?>> newContainedViews) {
        if (!containedViews.containsAll(newContainedViews)) {
            Set<PCollectionView<?>> currentlyContained = ImmutableSet.copyOf(containedViews);
            Set<PCollectionView<?>> newRequested = ImmutableSet.copyOf(newContainedViews);
            throw new IllegalArgumentException("Can't create a SideInputReader with unknown views "
                    + Sets.difference(newRequested, currentlyContained));
        }
        return new SideInputContainerSideInputReader(newContainedViews);
    }

    /**
     * Write the provided values to the provided view.
     *
     * <p>The windowed values are first exploded, then for each window the pane is determined. For
     * each window, if the pane is later than the current pane stored within this container, write
     * all of the values to the container as the new values of the {@link PCollectionView}.
     *
     * <p>The provided iterable is expected to contain only a single window and pane.
     */
    public void write(PCollectionView<?> view, Iterable<? extends WindowedValue<?>> values) {
        Map<BoundedWindow, Collection<WindowedValue<?>>> valuesPerWindow = indexValuesByWindow(values);
        for (Map.Entry<BoundedWindow, Collection<WindowedValue<?>>> windowValues : valuesPerWindow.entrySet()) {
            updatePCollectionViewWindowValues(view, windowValues.getKey(), windowValues.getValue());
        }
    }

    /**
     * Index the provided values by all {@link BoundedWindow windows} in which they appear.
     */
    private Map<BoundedWindow, Collection<WindowedValue<?>>> indexValuesByWindow(
            Iterable<? extends WindowedValue<?>> values) {
        Map<BoundedWindow, Collection<WindowedValue<?>>> valuesPerWindow = new HashMap<>();
        for (WindowedValue<?> value : values) {
            for (BoundedWindow window : value.getWindows()) {
                Collection<WindowedValue<?>> windowValues = valuesPerWindow.get(window);
                if (windowValues == null) {
                    windowValues = new ArrayList<>();
                    valuesPerWindow.put(window, windowValues);
                }
                windowValues.add(value);
            }
        }
        return valuesPerWindow;
    }

    /**
     * Set the value of the {@link PCollectionView} in the {@link BoundedWindow} to be based on the
     * specified values, if the values are part of a later pane than currently exist within the
     * {@link PCollectionViewWindow}.
     */
    private void updatePCollectionViewWindowValues(PCollectionView<?> view, BoundedWindow window,
            Collection<WindowedValue<?>> windowValues) {
        PCollectionViewWindow<?> windowedView = PCollectionViewWindow.of(view, window);
        AtomicReference<Iterable<? extends WindowedValue<?>>> contents = viewByWindows.getUnchecked(windowedView);
        if (contents.compareAndSet(null, windowValues)) {
            // the value had never been set, so we set it and are done.
            return;
        }
        PaneInfo newPane = windowValues.iterator().next().getPane();

        Iterable<? extends WindowedValue<?>> existingValues;
        long existingPane;
        do {
            existingValues = contents.get();
            existingPane = Iterables.isEmpty(existingValues) ? -1L
                    : existingValues.iterator().next().getPane().getIndex();
        } while (newPane.getIndex() > existingPane && !contents.compareAndSet(existingValues, windowValues));
    }

    private static class CallbackSchedulingLoader
            extends CacheLoader<PCollectionViewWindow<?>, AtomicReference<Iterable<? extends WindowedValue<?>>>> {
        private final EvaluationContext context;

        public CallbackSchedulingLoader(EvaluationContext context) {
            this.context = context;
        }

        @Override
        public AtomicReference<Iterable<? extends WindowedValue<?>>> load(PCollectionViewWindow<?> view) {

            AtomicReference<Iterable<? extends WindowedValue<?>>> contents = new AtomicReference<>();
            WindowingStrategy<?, ?> windowingStrategy = view.getView().getWindowingStrategyInternal();

            context.scheduleAfterOutputWouldBeProduced(view.getView(), view.getWindow(), windowingStrategy,
                    new WriteEmptyViewContents(view.getView(), view.getWindow(), contents));
            return contents;
        }
    }

    private static class WriteEmptyViewContents implements Runnable {
        private final PCollectionView<?> view;
        private final BoundedWindow window;
        private final AtomicReference<Iterable<? extends WindowedValue<?>>> contents;

        private WriteEmptyViewContents(PCollectionView<?> view, BoundedWindow window,
                AtomicReference<Iterable<? extends WindowedValue<?>>> contents) {
            this.contents = contents;
            this.view = view;
            this.window = window;
        }

        @Override
        public void run() {
            // The requested window has closed without producing elements, so reflect that in
            // the PCollectionView. If set has already been called, will do nothing.
            contents.compareAndSet(null, Collections.<WindowedValue<?>>emptyList());
        }

        @Override
        public String toString() {
            return MoreObjects.toStringHelper(this).add("view", view).add("window", window).toString();
        }
    }

    private final class SideInputContainerSideInputReader implements ReadyCheckingSideInputReader {
        private final Collection<PCollectionView<?>> readerViews;
        private final LoadingCache<PCollectionViewWindow<?>, Optional<? extends Iterable<? extends WindowedValue<?>>>> viewContents;

        private SideInputContainerSideInputReader(Collection<PCollectionView<?>> readerViews) {
            this.readerViews = ImmutableSet.copyOf(readerViews);
            this.viewContents = CacheBuilder.newBuilder().build(new CurrentViewContentsLoader());
        }

        @Override
        public boolean isReady(final PCollectionView<?> view, final BoundedWindow window) {
            checkArgument(readerViews.contains(view),
                    "Tried to check if view %s was ready in a SideInputReader that does not contain it. "
                            + "Contained views; %s",
                    view, readerViews);
            return viewContents.getUnchecked(PCollectionViewWindow.of(view, window)).isPresent();
        }

        @Override
        @Nullable
        public <T> T get(final PCollectionView<T> view, final BoundedWindow window) {
            checkArgument(readerViews.contains(view), "call to get(PCollectionView) with unknown view: %s", view);
            checkArgument(isReady(view, window),
                    "calling get() on PCollectionView %s that is not ready in window %s", view, window);
            // Safe covariant cast
            @SuppressWarnings("unchecked")
            Iterable<WindowedValue<?>> values = (Iterable<WindowedValue<?>>) viewContents
                    .getUnchecked(PCollectionViewWindow.of(view, window)).get();
            return view.getViewFn().apply(values);
        }

        @Override
        public <T> boolean contains(PCollectionView<T> view) {
            return readerViews.contains(view);
        }

        @Override
        public boolean isEmpty() {
            return readerViews.isEmpty();
        }
    }

    /**
     * A {@link CacheLoader} that loads the current contents of a {@link PCollectionViewWindow} into
     * an optional.
     */
    private class CurrentViewContentsLoader extends
            CacheLoader<PCollectionViewWindow<?>, Optional<? extends Iterable<? extends WindowedValue<?>>>> {

        @Override
        public Optional<? extends Iterable<? extends WindowedValue<?>>> load(PCollectionViewWindow<?> key) {
            return Optional.fromNullable(viewByWindows.getUnchecked(key).get());
        }
    }
}