org.apache.flink.optimizer.dag.SingleInputNode.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.flink.optimizer.dag.SingleInputNode.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.optimizer.dag;

import static org.apache.flink.optimizer.plan.PlanNode.SourceAndDamReport.FOUND_SOURCE;
import static org.apache.flink.optimizer.plan.PlanNode.SourceAndDamReport.FOUND_SOURCE_AND_DAM;
import static org.apache.flink.optimizer.plan.PlanNode.SourceAndDamReport.NOT_FOUND;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.flink.api.common.ExecutionMode;
import org.apache.flink.api.common.operators.Operator;
import org.apache.flink.api.common.operators.SemanticProperties;
import org.apache.flink.api.common.operators.SingleInputOperator;
import org.apache.flink.api.common.operators.util.FieldSet;
import org.apache.flink.optimizer.CompilerException;
import org.apache.flink.optimizer.Optimizer;
import org.apache.flink.optimizer.costs.CostEstimator;
import org.apache.flink.optimizer.dataproperties.GlobalProperties;
import org.apache.flink.optimizer.dataproperties.InterestingProperties;
import org.apache.flink.optimizer.dataproperties.LocalProperties;
import org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties;
import org.apache.flink.optimizer.dataproperties.RequestedLocalProperties;
import org.apache.flink.optimizer.operators.OperatorDescriptorSingle;
import org.apache.flink.optimizer.plan.Channel;
import org.apache.flink.optimizer.plan.NamedChannel;
import org.apache.flink.optimizer.plan.PlanNode;
import org.apache.flink.optimizer.plan.SingleInputPlanNode;
import org.apache.flink.optimizer.plan.PlanNode.SourceAndDamReport;
import org.apache.flink.optimizer.util.NoOpUnaryUdfOp;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.io.network.DataExchangeMode;
import org.apache.flink.runtime.operators.shipping.ShipStrategyType;
import org.apache.flink.util.Visitor;

import com.google.common.collect.Sets;

/**
 * A node in the optimizer's program representation for an operation with a single input.
 * 
 * This class contains all the generic logic for handling branching flows, as well as to
 * enumerate candidate execution plans. The subclasses for specific operators simply add logic
 * for cost estimates and specify possible strategies for their execution.
 */
public abstract class SingleInputNode extends OptimizerNode {

    protected final FieldSet keys; // The set of key fields

    protected DagConnection inConn; // the input of the node

    // --------------------------------------------------------------------------------------------

    /**
     * Creates a new node with a single input for the optimizer plan.
     * 
     * @param programOperator The PACT that the node represents.
     */
    protected SingleInputNode(SingleInputOperator<?, ?, ?> programOperator) {
        super(programOperator);

        int[] k = programOperator.getKeyColumns(0);
        this.keys = k == null || k.length == 0 ? null : new FieldSet(k);
    }

    protected SingleInputNode(FieldSet keys) {
        super(NoOpUnaryUdfOp.INSTANCE);
        this.keys = keys;
    }

    protected SingleInputNode() {
        super(NoOpUnaryUdfOp.INSTANCE);
        this.keys = null;
    }

    protected SingleInputNode(SingleInputNode toCopy) {
        super(toCopy);

        this.keys = toCopy.keys;
    }

    // --------------------------------------------------------------------------------------------

    @Override
    public SingleInputOperator<?, ?, ?> getOperator() {
        return (SingleInputOperator<?, ?, ?>) super.getOperator();
    }

    /**
     * Gets the input of this operator.
     * 
     * @return The input.
     */
    public DagConnection getIncomingConnection() {
        return this.inConn;
    }

    /**
     * Sets the connection through which this node receives its input.
     * 
     * @param inConn The input connection to set.
     */
    public void setIncomingConnection(DagConnection inConn) {
        this.inConn = inConn;
    }

    /**
     * Gets the predecessor of this node.
     * 
     * @return The predecessor of this node. 
     */
    public OptimizerNode getPredecessorNode() {
        if (this.inConn != null) {
            return this.inConn.getSource();
        } else {
            return null;
        }
    }

    @Override
    public List<DagConnection> getIncomingConnections() {
        return Collections.singletonList(this.inConn);
    }

    @Override
    public SemanticProperties getSemanticProperties() {
        return getOperator().getSemanticProperties();
    }

    protected SemanticProperties getSemanticPropertiesForLocalPropertyFiltering() {
        return this.getSemanticProperties();
    }

    protected SemanticProperties getSemanticPropertiesForGlobalPropertyFiltering() {
        return this.getSemanticProperties();
    }

    @Override
    public void setInput(Map<Operator<?>, OptimizerNode> contractToNode, ExecutionMode defaultExchangeMode)
            throws CompilerException {
        // see if an internal hint dictates the strategy to use
        final Configuration conf = getOperator().getParameters();
        final String shipStrategy = conf.getString(Optimizer.HINT_SHIP_STRATEGY, null);
        final ShipStrategyType preSet;

        if (shipStrategy != null) {
            if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH)) {
                preSet = ShipStrategyType.PARTITION_HASH;
            } else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION_RANGE)) {
                preSet = ShipStrategyType.PARTITION_RANGE;
            } else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_FORWARD)) {
                preSet = ShipStrategyType.FORWARD;
            } else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION)) {
                preSet = ShipStrategyType.PARTITION_RANDOM;
            } else {
                throw new CompilerException("Unrecognized ship strategy hint: " + shipStrategy);
            }
        } else {
            preSet = null;
        }

        // get the predecessor node
        Operator<?> children = ((SingleInputOperator<?, ?, ?>) getOperator()).getInput();

        OptimizerNode pred;
        DagConnection conn;
        if (children == null) {
            throw new CompilerException("Error: Node for '" + getOperator().getName() + "' has no input.");
        } else {
            pred = contractToNode.get(children);
            conn = new DagConnection(pred, this, defaultExchangeMode);
            if (preSet != null) {
                conn.setShipStrategy(preSet);
            }
        }

        // create the connection and add it
        setIncomingConnection(conn);
        pred.addOutgoingConnection(conn);
    }

    // --------------------------------------------------------------------------------------------
    //                             Properties and Optimization
    // --------------------------------------------------------------------------------------------

    protected abstract List<OperatorDescriptorSingle> getPossibleProperties();

    @Override
    public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
        // get what we inherit and what is preserved by our user code 
        final InterestingProperties props = getInterestingProperties().filterByCodeAnnotations(this, 0);

        // add all properties relevant to this node
        for (OperatorDescriptorSingle dps : getPossibleProperties()) {
            for (RequestedGlobalProperties gp : dps.getPossibleGlobalProperties()) {

                if (gp.getPartitioning().isPartitionedOnKey()) {
                    // make sure that among the same partitioning types, we do not push anything down that has fewer key fields

                    for (RequestedGlobalProperties contained : props.getGlobalProperties()) {
                        if (contained.getPartitioning() == gp.getPartitioning()
                                && gp.getPartitionedFields().isValidSubset(contained.getPartitionedFields())) {
                            props.getGlobalProperties().remove(contained);
                            break;
                        }
                    }
                }

                props.addGlobalProperties(gp);
            }

            for (RequestedLocalProperties lp : dps.getPossibleLocalProperties()) {
                props.addLocalProperties(lp);
            }
        }
        this.inConn.setInterestingProperties(props);

        for (DagConnection conn : getBroadcastConnections()) {
            conn.setInterestingProperties(new InterestingProperties());
        }
    }

    @Override
    public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
        // check if we have a cached version
        if (this.cachedPlans != null) {
            return this.cachedPlans;
        }

        boolean childrenSkippedDueToReplicatedInput = false;

        // calculate alternative sub-plans for predecessor
        final List<? extends PlanNode> subPlans = getPredecessorNode().getAlternativePlans(estimator);
        final Set<RequestedGlobalProperties> intGlobal = this.inConn.getInterestingProperties()
                .getGlobalProperties();

        // calculate alternative sub-plans for broadcast inputs
        final List<Set<? extends NamedChannel>> broadcastPlanChannels = new ArrayList<Set<? extends NamedChannel>>();
        List<DagConnection> broadcastConnections = getBroadcastConnections();
        List<String> broadcastConnectionNames = getBroadcastConnectionNames();

        for (int i = 0; i < broadcastConnections.size(); i++) {
            DagConnection broadcastConnection = broadcastConnections.get(i);
            String broadcastConnectionName = broadcastConnectionNames.get(i);
            List<PlanNode> broadcastPlanCandidates = broadcastConnection.getSource().getAlternativePlans(estimator);

            // wrap the plan candidates in named channels
            HashSet<NamedChannel> broadcastChannels = new HashSet<NamedChannel>(broadcastPlanCandidates.size());
            for (PlanNode plan : broadcastPlanCandidates) {
                NamedChannel c = new NamedChannel(broadcastConnectionName, plan);
                DataExchangeMode exMode = DataExchangeMode.select(broadcastConnection.getDataExchangeMode(),
                        ShipStrategyType.BROADCAST, broadcastConnection.isBreakingPipeline());
                c.setShipStrategy(ShipStrategyType.BROADCAST, exMode);
                broadcastChannels.add(c);
            }
            broadcastPlanChannels.add(broadcastChannels);
        }

        final RequestedGlobalProperties[] allValidGlobals;
        {
            Set<RequestedGlobalProperties> pairs = new HashSet<RequestedGlobalProperties>();
            for (OperatorDescriptorSingle ods : getPossibleProperties()) {
                pairs.addAll(ods.getPossibleGlobalProperties());
            }
            allValidGlobals = pairs.toArray(new RequestedGlobalProperties[pairs.size()]);
        }
        final ArrayList<PlanNode> outputPlans = new ArrayList<PlanNode>();

        final ExecutionMode executionMode = this.inConn.getDataExchangeMode();

        final int parallelism = getParallelism();
        final int inParallelism = getPredecessorNode().getParallelism();

        final boolean parallelismChange = inParallelism != parallelism;

        final boolean breaksPipeline = this.inConn.isBreakingPipeline();

        // create all candidates
        for (PlanNode child : subPlans) {

            if (child.getGlobalProperties().isFullyReplicated()) {
                // fully replicated input is always locally forwarded if the parallelism is not changed
                if (parallelismChange) {
                    // can not continue with this child
                    childrenSkippedDueToReplicatedInput = true;
                    continue;
                } else {
                    this.inConn.setShipStrategy(ShipStrategyType.FORWARD);
                }
            }

            if (this.inConn.getShipStrategy() == null) {
                // pick the strategy ourselves
                for (RequestedGlobalProperties igps : intGlobal) {
                    final Channel c = new Channel(child, this.inConn.getMaterializationMode());
                    igps.parameterizeChannel(c, parallelismChange, executionMode, breaksPipeline);

                    // if the parallelism changed, make sure that we cancel out properties, unless the
                    // ship strategy preserves/establishes them even under changing parallelisms
                    if (parallelismChange && !c.getShipStrategy().isNetworkStrategy()) {
                        c.getGlobalProperties().reset();
                    }

                    // check whether we meet any of the accepted properties
                    // we may remove this check, when we do a check to not inherit
                    // requested global properties that are incompatible with all possible
                    // requested properties
                    for (RequestedGlobalProperties rgps : allValidGlobals) {
                        if (rgps.isMetBy(c.getGlobalProperties())) {
                            c.setRequiredGlobalProps(rgps);
                            addLocalCandidates(c, broadcastPlanChannels, igps, outputPlans, estimator);
                            break;
                        }
                    }
                }
            } else {
                // hint fixed the strategy
                final Channel c = new Channel(child, this.inConn.getMaterializationMode());
                final ShipStrategyType shipStrategy = this.inConn.getShipStrategy();
                final DataExchangeMode exMode = DataExchangeMode.select(executionMode, shipStrategy,
                        breaksPipeline);

                if (this.keys != null) {
                    c.setShipStrategy(shipStrategy, this.keys.toFieldList(), exMode);
                } else {
                    c.setShipStrategy(shipStrategy, exMode);
                }

                if (parallelismChange) {
                    c.adjustGlobalPropertiesForFullParallelismChange();
                }

                // check whether we meet any of the accepted properties
                for (RequestedGlobalProperties rgps : allValidGlobals) {
                    if (rgps.isMetBy(c.getGlobalProperties())) {
                        addLocalCandidates(c, broadcastPlanChannels, rgps, outputPlans, estimator);
                        break;
                    }
                }
            }
        }

        if (outputPlans.isEmpty()) {
            if (childrenSkippedDueToReplicatedInput) {
                throw new CompilerException("No plan meeting the requirements could be created @ " + this
                        + ". Most likely reason: Invalid use of replicated input.");
            } else {
                throw new CompilerException("No plan meeting the requirements could be created @ " + this
                        + ". Most likely reason: Too restrictive plan hints.");
            }
        }

        // cost and prune the plans
        for (PlanNode node : outputPlans) {
            estimator.costOperator(node);
        }
        prunePlanAlternatives(outputPlans);
        outputPlans.trimToSize();

        this.cachedPlans = outputPlans;
        return outputPlans;
    }

    protected void addLocalCandidates(Channel template, List<Set<? extends NamedChannel>> broadcastPlanChannels,
            RequestedGlobalProperties rgps, List<PlanNode> target, CostEstimator estimator) {
        for (RequestedLocalProperties ilp : this.inConn.getInterestingProperties().getLocalProperties()) {
            final Channel in = template.clone();
            ilp.parameterizeChannel(in);

            // instantiate a candidate, if the instantiated local properties meet one possible local property set
            outer: for (OperatorDescriptorSingle dps : getPossibleProperties()) {
                for (RequestedLocalProperties ilps : dps.getPossibleLocalProperties()) {
                    if (ilps.isMetBy(in.getLocalProperties())) {
                        in.setRequiredLocalProps(ilps);
                        instantiateCandidate(dps, in, broadcastPlanChannels, target, estimator, rgps, ilp);
                        break outer;
                    }
                }
            }
        }
    }

    protected void instantiateCandidate(OperatorDescriptorSingle dps, Channel in,
            List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator,
            RequestedGlobalProperties globPropsReq, RequestedLocalProperties locPropsReq) {
        final PlanNode inputSource = in.getSource();

        for (List<NamedChannel> broadcastChannelsCombination : Sets.cartesianProduct(broadcastPlanChannels)) {

            boolean validCombination = true;
            boolean requiresPipelinebreaker = false;

            // check whether the broadcast inputs use the same plan candidate at the branching point
            for (int i = 0; i < broadcastChannelsCombination.size(); i++) {
                NamedChannel nc = broadcastChannelsCombination.get(i);
                PlanNode bcSource = nc.getSource();

                // check branch compatibility against input
                if (!areBranchCompatible(bcSource, inputSource)) {
                    validCombination = false;
                    break;
                }

                // check branch compatibility against all other broadcast variables
                for (int k = 0; k < i; k++) {
                    PlanNode otherBcSource = broadcastChannelsCombination.get(k).getSource();

                    if (!areBranchCompatible(bcSource, otherBcSource)) {
                        validCombination = false;
                        break;
                    }
                }

                // check if there is a common predecessor and whether there is a dam on the way to all common predecessors
                if (in.isOnDynamicPath() && this.hereJoinedBranches != null) {
                    for (OptimizerNode brancher : this.hereJoinedBranches) {
                        PlanNode candAtBrancher = in.getSource().getCandidateAtBranchPoint(brancher);

                        if (candAtBrancher == null) {
                            // closed branch between two broadcast variables
                            continue;
                        }

                        SourceAndDamReport res = in.getSource().hasDamOnPathDownTo(candAtBrancher);
                        if (res == NOT_FOUND) {
                            throw new CompilerException("Bug: Tracing dams for deadlock detection is broken.");
                        } else if (res == FOUND_SOURCE) {
                            requiresPipelinebreaker = true;
                            break;
                        } else if (res == FOUND_SOURCE_AND_DAM) {
                            // good
                        } else {
                            throw new CompilerException();
                        }
                    }
                }
            }

            if (!validCombination) {
                continue;
            }

            if (requiresPipelinebreaker) {
                in.setTempMode(in.getTempMode().makePipelineBreaker());
            }

            final SingleInputPlanNode node = dps.instantiate(in, this);
            node.setBroadcastInputs(broadcastChannelsCombination);

            // compute how the strategy affects the properties
            GlobalProperties gProps = in.getGlobalProperties().clone();
            LocalProperties lProps = in.getLocalProperties().clone();
            gProps = dps.computeGlobalProperties(gProps);
            lProps = dps.computeLocalProperties(lProps);

            // filter by the user code field copies
            gProps = gProps.filterBySemanticProperties(getSemanticPropertiesForGlobalPropertyFiltering(), 0);
            lProps = lProps.filterBySemanticProperties(getSemanticPropertiesForLocalPropertyFiltering(), 0);

            // apply
            node.initProperties(gProps, lProps);
            node.updatePropertiesWithUniqueSets(getUniqueFields());
            target.add(node);
        }
    }

    // --------------------------------------------------------------------------------------------
    //                                     Branch Handling
    // --------------------------------------------------------------------------------------------

    @Override
    public void computeUnclosedBranchStack() {
        if (this.openBranches != null) {
            return;
        }

        addClosedBranches(getPredecessorNode().closedBranchingNodes);
        List<UnclosedBranchDescriptor> fromInput = getPredecessorNode().getBranchesForParent(this.inConn);

        // handle the data flow branching for the broadcast inputs
        List<UnclosedBranchDescriptor> result = computeUnclosedBranchStackForBroadcastInputs(fromInput);

        this.openBranches = (result == null || result.isEmpty()) ? Collections.<UnclosedBranchDescriptor>emptyList()
                : result;
    }

    // --------------------------------------------------------------------------------------------
    //                                     Miscellaneous
    // --------------------------------------------------------------------------------------------

    @Override
    public void accept(Visitor<OptimizerNode> visitor) {
        if (visitor.preVisit(this)) {
            if (getPredecessorNode() != null) {
                getPredecessorNode().accept(visitor);
            } else {
                throw new CompilerException();
            }
            for (DagConnection connection : getBroadcastConnections()) {
                connection.getSource().accept(visitor);
            }
            visitor.postVisit(this);
        }
    }
}