org.apache.beam.runners.fnexecution.graph.LengthPrefixUnknownCoders.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.beam.runners.fnexecution.graph.LengthPrefixUnknownCoders.java

Source

/*
 * Copyright (C) 2017 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package org.apache.beam.runners.fnexecution.graph;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import java.util.Set;
import org.apache.beam.model.pipeline.v1.RunnerApi;
import org.apache.beam.model.pipeline.v1.RunnerApi.Coder;
import org.apache.beam.model.pipeline.v1.RunnerApi.Components;
import org.apache.beam.model.pipeline.v1.RunnerApi.MessageWithComponents;
import org.apache.beam.sdk.coders.ByteArrayCoder;
import org.apache.beam.sdk.coders.LengthPrefixCoder;

/**
 * Utilities for replacing or wrapping unknown coders with {@link LengthPrefixCoder}.
 *
 * <p>TODO: Support a dynamic list of well known coders using either registration or manual listing,
 * possibly from ModelCoderRegistrar.
 */
public class LengthPrefixUnknownCoders {
    private static final String BYTES_CODER_TYPE = "beam:coder:bytes:v1";
    private static final String LENGTH_PREFIX_CODER_TYPE = "beam:coder:length_prefix:v1";
    private static final Set<String> WELL_KNOWN_CODER_URNS = ImmutableSet.of(BYTES_CODER_TYPE, "beam:coder:kv:v1",
            "beam:coder:varint:v1", "beam:coder:interval_window:v1", "beam:coder:iterable:v1",
            LENGTH_PREFIX_CODER_TYPE, "beam:coder:global_window:v1", "beam:coder:windowed_value:v1");

    /**
     * Recursively traverse the coder tree and wrap the first unknown coder in every branch with a
     * {@link LengthPrefixCoder} unless an ancestor coder is itself a {@link LengthPrefixCoder}. If
     * {@code replaceWithByteArrayCoder} is set, then replace that unknown coder with a
     * {@link ByteArrayCoder}. Note that no ids that are generated will collide with the ids supplied
     * within the {@link Components#getCodersMap() coder map} key space.
     *
     * @param coderId The root coder contained within {@code coders} to start the recursive descent
     * from.
     * @param components Contains the root coder and all component coders.
     * @param replaceWithByteArrayCoder whether to replace an unknown coder with a
     * {@link ByteArrayCoder}.
     * @return A {@link MessageWithComponents} with the
     * {@link MessageWithComponents#getCoder() root coder} and its component coders. Note that no ids
     * that are generated will collide with the ids supplied within the
     * {@link Components#getCodersMap() coder map} key space.
     */
    public static RunnerApi.MessageWithComponents forCoder(String coderId, RunnerApi.Components components,
            boolean replaceWithByteArrayCoder) {

        RunnerApi.Coder currentCoder = components.getCodersOrThrow(coderId);

        // We handle three cases:
        //  1) the requested coder is already a length prefix coder. In this case we just honor the
        //     request to replace the coder with a byte array coder.
        //  2) the requested coder is a known coder but not a length prefix coder. In this case we
        //     rebuild the coder by recursively length prefixing any unknown component coders.
        //  3) the requested coder is an unknown coder. In this case we either wrap the requested coder
        //     with a length prefix coder or replace it with a length prefix byte array coder.
        if (LENGTH_PREFIX_CODER_TYPE.equals(currentCoder.getSpec().getSpec().getUrn())) {
            if (replaceWithByteArrayCoder) {
                return createLengthPrefixByteArrayCoder(coderId, components);
            }

            MessageWithComponents.Builder rvalBuilder = MessageWithComponents.newBuilder();
            rvalBuilder.setCoder(currentCoder);
            rvalBuilder.setComponents(components);
            return rvalBuilder.build();
        } else if (WELL_KNOWN_CODER_URNS.contains(currentCoder.getSpec().getSpec().getUrn())) {
            return lengthPrefixUnknownComponentCoders(coderId, components, replaceWithByteArrayCoder);
        } else {
            return lengthPrefixUnknownCoder(coderId, components, replaceWithByteArrayCoder);
        }
    }

    private static MessageWithComponents lengthPrefixUnknownComponentCoders(String coderId,
            RunnerApi.Components components, boolean replaceWithByteArrayCoder) {

        MessageWithComponents.Builder rvalBuilder = MessageWithComponents.newBuilder();
        RunnerApi.Coder currentCoder = components.getCodersOrThrow(coderId);
        RunnerApi.Coder.Builder updatedCoder = currentCoder.toBuilder();
        // Rebuild the component coder ids to handle if any of the component coders changed.
        updatedCoder.clearComponentCoderIds();
        for (final String componentCoderId : currentCoder.getComponentCoderIdsList()) {
            MessageWithComponents componentCoder = forCoder(componentCoderId, components,
                    replaceWithByteArrayCoder);
            String newComponentCoderId = componentCoderId;
            if (!components.getCodersOrThrow(componentCoderId).equals(componentCoder.getCoder())) {
                // Generate a new id if the component coder changed.
                newComponentCoderId = generateUniqueId(coderId + "-length_prefix", Sets.union(
                        components.getCodersMap().keySet(), rvalBuilder.getComponents().getCodersMap().keySet()));
            }
            updatedCoder.addComponentCoderIds(newComponentCoderId);
            rvalBuilder.getComponentsBuilder().putCoders(newComponentCoderId, componentCoder.getCoder());
            // Insert all component coders of the component coder.
            rvalBuilder.getComponentsBuilder().putAllCoders(componentCoder.getComponents().getCodersMap());
        }
        rvalBuilder.setCoder(updatedCoder);

        return rvalBuilder.build();
    }

    // If we are handling an unknown URN then we need to wrap it with a length prefix coder.
    // If requested we also replace the unknown coder with a byte array coder.
    private static MessageWithComponents lengthPrefixUnknownCoder(String coderId, RunnerApi.Components components,
            boolean replaceWithByteArrayCoder) {
        MessageWithComponents.Builder rvalBuilder = MessageWithComponents.newBuilder();
        RunnerApi.Coder currentCoder = components.getCodersOrThrow(coderId);

        String lengthPrefixComponentCoderId = coderId;
        if (replaceWithByteArrayCoder) {
            return createLengthPrefixByteArrayCoder(coderId, components);
        } else {
            rvalBuilder.getComponentsBuilder().putCoders(coderId, currentCoder);
        }

        rvalBuilder.getCoderBuilder().addComponentCoderIds(lengthPrefixComponentCoderId).getSpecBuilder()
                .getSpecBuilder().setUrn(LENGTH_PREFIX_CODER_TYPE);
        return rvalBuilder.build();
    }

    private static MessageWithComponents createLengthPrefixByteArrayCoder(String coderId,
            RunnerApi.Components components) {
        MessageWithComponents.Builder rvalBuilder = MessageWithComponents.newBuilder();

        String byteArrayCoderId = generateUniqueId(coderId + "-byte_array", Sets
                .union(components.getCodersMap().keySet(), rvalBuilder.getComponents().getCodersMap().keySet()));
        Coder.Builder byteArrayCoder = Coder.newBuilder();
        byteArrayCoder.getSpecBuilder().getSpecBuilder().setUrn(BYTES_CODER_TYPE);
        rvalBuilder.getComponentsBuilder().putCoders(byteArrayCoderId, byteArrayCoder.build());
        rvalBuilder.getCoderBuilder().addComponentCoderIds(byteArrayCoderId).getSpecBuilder().getSpecBuilder()
                .setUrn(LENGTH_PREFIX_CODER_TYPE);

        return rvalBuilder.build();
    }

    /**
     * Generates a unique id given a prefix and the set of existing ids.
     */
    static String generateUniqueId(String prefix, Set<String> existingIds) {
        int i = 0;
        while (existingIds.contains(prefix + i)) {
            i += 1;
        }
        return prefix + i;
    }
}