eu.stratosphere.compiler.dag.SingleInputNode.java Source code

Java tutorial

Introduction

Here is the source code for eu.stratosphere.compiler.dag.SingleInputNode.java

Source

/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.compiler.dag;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.google.common.collect.Sets;

import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.SingleInputOperator;
import eu.stratosphere.api.common.operators.SingleInputSemanticProperties;
import eu.stratosphere.api.common.operators.util.FieldSet;
import eu.stratosphere.compiler.CompilerException;
import eu.stratosphere.compiler.PactCompiler;
import eu.stratosphere.compiler.costs.CostEstimator;
import eu.stratosphere.compiler.dataproperties.GlobalProperties;
import eu.stratosphere.compiler.dataproperties.InterestingProperties;
import eu.stratosphere.compiler.dataproperties.LocalProperties;
import eu.stratosphere.compiler.dataproperties.RequestedGlobalProperties;
import eu.stratosphere.compiler.dataproperties.RequestedLocalProperties;
import eu.stratosphere.compiler.operators.OperatorDescriptorSingle;
import eu.stratosphere.compiler.plan.Channel;
import eu.stratosphere.compiler.plan.NamedChannel;
import eu.stratosphere.compiler.plan.PlanNode;
import eu.stratosphere.compiler.plan.SingleInputPlanNode;
import eu.stratosphere.compiler.util.NoOpUnaryUdfOp;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.pact.runtime.shipping.ShipStrategyType;
import eu.stratosphere.pact.runtime.task.util.LocalStrategy;
import eu.stratosphere.util.Visitor;

/**
 * A node in the optimizer's program representation for a PACT with a single input.
 * 
 * This class contains all the generic logic for branch handling, interesting properties,
 * and candidate plan enumeration. The subclasses for specific operators simply add logic
 * for cost estimates and specify possible strategies for their realization.
 */
public abstract class SingleInputNode extends OptimizerNode {

    protected final FieldSet keys; // The set of key fields

    protected PactConnection inConn; // the input of the node

    // --------------------------------------------------------------------------------------------

    /**
     * Creates a new node with a single input for the optimizer plan.
     * 
     * @param pactContract The PACT that the node represents.
     */
    protected SingleInputNode(SingleInputOperator<?, ?, ?> pactContract) {
        super(pactContract);

        int[] k = pactContract.getKeyColumns(0);
        this.keys = k == null || k.length == 0 ? null : new FieldSet(k);
    }

    protected SingleInputNode(FieldSet keys) {
        super(NoOpUnaryUdfOp.INSTANCE);
        this.keys = keys;
    }

    protected SingleInputNode() {
        super(NoOpUnaryUdfOp.INSTANCE);
        this.keys = null;
    }

    protected SingleInputNode(SingleInputNode toCopy) {
        super(toCopy);

        this.keys = toCopy.keys;
    }

    // --------------------------------------------------------------------------------------------

    @Override
    public SingleInputOperator<?, ?, ?> getPactContract() {
        return (SingleInputOperator<?, ?, ?>) super.getPactContract();
    }

    /**
     * Gets the input of this operator.
     * 
     * @return The input.
     */
    public PactConnection getIncomingConnection() {
        return this.inConn;
    }

    /**
     * Sets the <tt>PactConnection</tt> through which this node receives its input.
     * 
     * @param inConn The input connection to set.
     */
    public void setIncomingConnection(PactConnection inConn) {
        this.inConn = inConn;
    }

    /**
     * Gets the predecessor of this node.
     * 
     * @return The predecessor of this node. 
     */
    public OptimizerNode getPredecessorNode() {
        if (this.inConn != null) {
            return this.inConn.getSource();
        } else {
            return null;
        }
    }

    @Override
    public List<PactConnection> getIncomingConnections() {
        return Collections.singletonList(this.inConn);
    }

    @Override
    public boolean isFieldConstant(int input, int fieldNumber) {
        if (input != 0) {
            throw new IndexOutOfBoundsException();
        }

        SingleInputOperator<?, ?, ?> c = getPactContract();
        SingleInputSemanticProperties semanticProperties = c.getSemanticProperties();

        if (semanticProperties != null) {
            FieldSet fs;
            if ((fs = semanticProperties.getForwardedField(fieldNumber)) != null) {
                return fs.contains(fieldNumber);
            }
        }

        return false;
    }

    @Override
    public void setInput(Map<Operator<?>, OptimizerNode> contractToNode) throws CompilerException {
        // see if an internal hint dictates the strategy to use
        final Configuration conf = getPactContract().getParameters();
        final String shipStrategy = conf.getString(PactCompiler.HINT_SHIP_STRATEGY, null);
        final ShipStrategyType preSet;

        if (shipStrategy != null) {
            if (shipStrategy.equalsIgnoreCase(PactCompiler.HINT_SHIP_STRATEGY_REPARTITION_HASH)) {
                preSet = ShipStrategyType.PARTITION_HASH;
            } else if (shipStrategy.equalsIgnoreCase(PactCompiler.HINT_SHIP_STRATEGY_REPARTITION_RANGE)) {
                preSet = ShipStrategyType.PARTITION_RANGE;
            } else if (shipStrategy.equalsIgnoreCase(PactCompiler.HINT_SHIP_STRATEGY_FORWARD)) {
                preSet = ShipStrategyType.FORWARD;
            } else if (shipStrategy.equalsIgnoreCase(PactCompiler.HINT_SHIP_STRATEGY_REPARTITION)) {
                preSet = ShipStrategyType.PARTITION_RANDOM;
            } else {
                throw new CompilerException("Unrecognized ship strategy hint: " + shipStrategy);
            }
        } else {
            preSet = null;
        }

        // get the predecessor node
        Operator<?> children = ((SingleInputOperator<?, ?, ?>) getPactContract()).getInput();

        OptimizerNode pred;
        PactConnection conn;
        if (children == null) {
            throw new CompilerException("Error: Node for '" + getPactContract().getName() + "' has no input.");
        } else {
            pred = contractToNode.get(children);
            conn = new PactConnection(pred, this);
            if (preSet != null) {
                conn.setShipStrategy(preSet);
            }
        }

        // create the connection and add it
        setIncomingConnection(conn);
        pred.addOutgoingConnection(conn);
    }

    // --------------------------------------------------------------------------------------------
    //                             Properties and Optimization
    // --------------------------------------------------------------------------------------------

    protected abstract List<OperatorDescriptorSingle> getPossibleProperties();

    @Override
    public boolean isMemoryConsumer() {
        for (OperatorDescriptorSingle dps : getPossibleProperties()) {
            if (dps.getStrategy().firstDam().isMaterializing()) {
                return true;
            }
            for (RequestedLocalProperties rlp : dps.getPossibleLocalProperties()) {
                if (!rlp.isTrivial()) {
                    return true;
                }
            }
        }
        return false;
    }

    @Override
    public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
        // get what we inherit and what is preserved by our user code 
        final InterestingProperties props = getInterestingProperties().filterByCodeAnnotations(this, 0);

        // add all properties relevant to this node
        for (OperatorDescriptorSingle dps : getPossibleProperties()) {
            for (RequestedGlobalProperties gp : dps.getPossibleGlobalProperties()) {
                props.addGlobalProperties(gp);
            }
            for (RequestedLocalProperties lp : dps.getPossibleLocalProperties()) {
                props.addLocalProperties(lp);
            }
        }
        this.inConn.setInterestingProperties(props);

        for (PactConnection conn : getBroadcastConnections()) {
            conn.setInterestingProperties(new InterestingProperties());
        }
    }

    @Override
    public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
        // check if we have a cached version
        if (this.cachedPlans != null) {
            return this.cachedPlans;
        }

        // calculate alternative sub-plans for predecessor
        final List<? extends PlanNode> subPlans = getPredecessorNode().getAlternativePlans(estimator);
        final Set<RequestedGlobalProperties> intGlobal = this.inConn.getInterestingProperties()
                .getGlobalProperties();

        // calculate alternative sub-plans for broadcast inputs
        final List<Set<? extends NamedChannel>> broadcastPlanChannels = new ArrayList<Set<? extends NamedChannel>>();
        List<PactConnection> broadcastConnections = getBroadcastConnections();
        List<String> broadcastConnectionNames = getBroadcastConnectionNames();
        for (int i = 0; i < broadcastConnections.size(); i++) {
            PactConnection broadcastConnection = broadcastConnections.get(i);
            String broadcastConnectionName = broadcastConnectionNames.get(i);
            List<PlanNode> broadcastPlanCandidates = broadcastConnection.getSource().getAlternativePlans(estimator);
            // wrap the plan candidates in named channels 
            HashSet<NamedChannel> broadcastChannels = new HashSet<NamedChannel>(broadcastPlanCandidates.size());
            for (PlanNode plan : broadcastPlanCandidates) {
                final NamedChannel c = new NamedChannel(broadcastConnectionName, plan);
                c.setShipStrategy(ShipStrategyType.BROADCAST);
                broadcastChannels.add(c);
            }
            broadcastPlanChannels.add(broadcastChannels);
        }

        final RequestedGlobalProperties[] allValidGlobals;
        {
            Set<RequestedGlobalProperties> pairs = new HashSet<RequestedGlobalProperties>();
            for (OperatorDescriptorSingle ods : getPossibleProperties()) {
                pairs.addAll(ods.getPossibleGlobalProperties());
            }
            allValidGlobals = (RequestedGlobalProperties[]) pairs
                    .toArray(new RequestedGlobalProperties[pairs.size()]);
        }
        final ArrayList<PlanNode> outputPlans = new ArrayList<PlanNode>();

        final int dop = getDegreeOfParallelism();
        final int subPerInstance = getSubtasksPerInstance();
        final int inDop = getPredecessorNode().getDegreeOfParallelism();
        final int inSubPerInstance = getPredecessorNode().getSubtasksPerInstance();
        final int numInstances = dop / subPerInstance + (dop % subPerInstance == 0 ? 0 : 1);
        final int inNumInstances = inDop / inSubPerInstance + (inDop % inSubPerInstance == 0 ? 0 : 1);

        final boolean globalDopChange = numInstances != inNumInstances;
        final boolean localDopChange = numInstances == inNumInstances & subPerInstance != inSubPerInstance;

        // create all candidates
        for (PlanNode child : subPlans) {
            if (this.inConn.getShipStrategy() == null) {
                // pick the strategy ourselves
                for (RequestedGlobalProperties igps : intGlobal) {
                    final Channel c = new Channel(child, this.inConn.getMaterializationMode());
                    igps.parameterizeChannel(c, globalDopChange, localDopChange);

                    // if the DOP changed, make sure that we cancel out properties, unless the
                    // ship strategy preserves/establishes them even under changing DOPs
                    if (globalDopChange && !c.getShipStrategy().isNetworkStrategy()) {
                        c.getGlobalProperties().reset();
                    }
                    if (localDopChange && !(c.getShipStrategy().isNetworkStrategy()
                            || c.getShipStrategy().compensatesForLocalDOPChanges())) {
                        c.getGlobalProperties().reset();
                    }

                    // check whether we meet any of the accepted properties
                    // we may remove this check, when we do a check to not inherit
                    // requested global properties that are incompatible with all possible
                    // requested properties
                    for (RequestedGlobalProperties rgps : allValidGlobals) {
                        if (rgps.isMetBy(c.getGlobalProperties())) {
                            addLocalCandidates(c, broadcastPlanChannels, igps, outputPlans, estimator);
                            break;
                        }
                    }
                }
            } else {
                // hint fixed the strategy
                final Channel c = new Channel(child, this.inConn.getMaterializationMode());
                if (this.keys != null) {
                    c.setShipStrategy(this.inConn.getShipStrategy(), this.keys.toFieldList());
                } else {
                    c.setShipStrategy(this.inConn.getShipStrategy());
                }

                if (globalDopChange) {
                    c.adjustGlobalPropertiesForFullParallelismChange();
                } else if (localDopChange) {
                    c.adjustGlobalPropertiesForLocalParallelismChange();
                }

                // check whether we meet any of the accepted properties
                for (RequestedGlobalProperties rgps : allValidGlobals) {
                    if (rgps.isMetBy(c.getGlobalProperties())) {
                        addLocalCandidates(c, broadcastPlanChannels, rgps, outputPlans, estimator);
                        break;
                    }
                }
            }
        }

        // cost and prune the plans
        for (PlanNode node : outputPlans) {
            estimator.costOperator(node);
        }
        prunePlanAlternatives(outputPlans);
        outputPlans.trimToSize();

        this.cachedPlans = outputPlans;
        return outputPlans;
    }

    protected void addLocalCandidates(Channel template, List<Set<? extends NamedChannel>> broadcastPlanChannels,
            RequestedGlobalProperties rgps, List<PlanNode> target, CostEstimator estimator) {
        final LocalProperties lp = template.getLocalPropertiesAfterShippingOnly();
        for (RequestedLocalProperties ilp : this.inConn.getInterestingProperties().getLocalProperties()) {
            final Channel in = template.clone();
            if (ilp.isMetBy(lp)) {
                in.setLocalStrategy(LocalStrategy.NONE);
            } else {
                ilp.parameterizeChannel(in);
            }

            // instantiate a candidate, if the instantiated local properties meet one possible local property set
            for (OperatorDescriptorSingle dps : getPossibleProperties()) {
                for (RequestedLocalProperties ilps : dps.getPossibleLocalProperties()) {
                    if (ilps.isMetBy(in.getLocalProperties())) {
                        instantiateCandidate(dps, in, broadcastPlanChannels, target, estimator, rgps, ilp);
                        break;
                    }
                }
            }
        }
    }

    protected void instantiateCandidate(OperatorDescriptorSingle dps, Channel in,
            List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator,
            RequestedGlobalProperties globPropsReq, RequestedLocalProperties locPropsReq) {
        final PlanNode inputSource = in.getSource();

        for (List<NamedChannel> broadcastChannelsCombination : Sets.cartesianProduct(broadcastPlanChannels)) {

            boolean validCombination = true;

            // check whether the broadcast inputs use the same plan candidate at the branching point
            for (int i = 0; i < broadcastChannelsCombination.size(); i++) {
                NamedChannel nc = broadcastChannelsCombination.get(i);
                PlanNode bcSource = nc.getSource();

                // check branch compatibility against input
                if (!areBranchCompatible(bcSource, inputSource)) {
                    validCombination = false;
                    break;
                }

                // check branch compatibility against all other broadcast variables
                for (int k = 0; k < i; k++) {
                    PlanNode otherBcSource = broadcastChannelsCombination.get(k).getSource();

                    if (!areBranchCompatible(bcSource, otherBcSource)) {
                        validCombination = false;
                        break;
                    }
                }
            }

            if (!validCombination) {
                continue;
            }

            final SingleInputPlanNode node = dps.instantiate(in, this);
            node.setBroadcastInputs(broadcastChannelsCombination);

            // compute how the strategy affects the properties
            GlobalProperties gProps = in.getGlobalProperties().clone();
            LocalProperties lProps = in.getLocalProperties().clone();
            gProps = dps.computeGlobalProperties(gProps);
            lProps = dps.computeLocalProperties(lProps);

            // filter by the user code field copies
            gProps = gProps.filterByNodesConstantSet(this, 0);
            lProps = lProps.filterByNodesConstantSet(this, 0);

            // apply
            node.initProperties(gProps, lProps);
            node.updatePropertiesWithUniqueSets(getUniqueFields());
            target.add(node);
        }
    }

    // --------------------------------------------------------------------------------------------
    //                                     Branch Handling
    // --------------------------------------------------------------------------------------------

    @Override
    public void computeUnclosedBranchStack() {
        if (this.openBranches != null) {
            return;
        }

        addClosedBranches(getPredecessorNode().closedBranchingNodes);
        List<UnclosedBranchDescriptor> fromInput = getPredecessorNode().getBranchesForParent(this.inConn);

        // handle the data flow branching for the broadcast inputs
        List<UnclosedBranchDescriptor> result = computeUnclosedBranchStackForBroadcastInputs(fromInput);

        this.openBranches = (result == null || result.isEmpty()) ? Collections.<UnclosedBranchDescriptor>emptyList()
                : result;
    }

    // --------------------------------------------------------------------------------------------
    //                                     Miscellaneous
    // --------------------------------------------------------------------------------------------

    @Override
    public void accept(Visitor<OptimizerNode> visitor) {
        if (visitor.preVisit(this)) {
            if (getPredecessorNode() != null) {
                getPredecessorNode().accept(visitor);
            } else {
                throw new CompilerException();
            }
            for (PactConnection connection : getBroadcastConnections()) {
                connection.getSource().accept(visitor);
            }
            visitor.postVisit(this);
        }
    }
}