org.apache.hyracks.algebricks.rewriter.rules.IntroduceProjectsRule.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hyracks.algebricks.rewriter.rules.IntroduceProjectsRule.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hyracks.algebricks.rewriter.rules;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.common.utils.Triple;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;

/**
 * Projects away unused variables at the earliest possible point.
 * Does a full DFS sweep of the plan adding ProjectOperators in the bottom-up pass.
 * Also, removes projects that have become useless.
 * TODO: This rule 'recklessly' adds as many projects as possible, but there is no guarantee
 * that the overall cost of the plan is reduced since project operators also add a cost.
 */
public class IntroduceProjectsRule implements IAlgebraicRewriteRule {

    private final Set<LogicalVariable> usedVars = new HashSet<>();
    private final Set<LogicalVariable> liveVars = new HashSet<>();
    private final Set<LogicalVariable> producedVars = new HashSet<>();
    private final List<LogicalVariable> projectVars = new ArrayList<>();
    protected boolean hasRun = false;
    // Keep track of used variables after the current operator, including used variables in itself.
    private final Map<AbstractLogicalOperator, HashSet<LogicalVariable>> allUsedVarsAfterOpMap = new HashMap<>();

    @Override
    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
        return false;
    }

    @Override
    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
            throws AlgebricksException {
        if (hasRun) {
            return false;
        }
        hasRun = true;

        // Collect all used variables after each operator, including the used variables in itself in the plan.
        // This is necessary since introduceProjects() may generate a wrong project if it doesn't have the information
        // for all paths in the plan in case there are two or more branches since it can only deal one path at a time.
        // So, a variable used in one path might be removed while the method traverses another path.
        Set<LogicalVariable> parentUsedVars = new HashSet<>();
        collectUsedVars(opRef, parentUsedVars);

        // Introduce projects
        return introduceProjects(null, -1, opRef, Collections.<LogicalVariable>emptySet(), context);
    }

    /**
     * Collect all used variables after each operator, including the used variables in itself in the plan.
     * Collecting information in a separate method is required since there can be multiple paths in the plan
     * and introduceProjects() method can deal with only one path at a time during conducting depth-first-search.
     */
    protected void collectUsedVars(Mutable<ILogicalOperator> opRef, Set<LogicalVariable> parentUsedVars)
            throws AlgebricksException {
        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
        HashSet<LogicalVariable> usedVarsPerOp = new HashSet<>();
        VariableUtilities.getUsedVariables(op, usedVarsPerOp);
        usedVarsPerOp.addAll(parentUsedVars);

        if (allUsedVarsAfterOpMap.get(op) == null) {
            allUsedVarsAfterOpMap.put(op, usedVarsPerOp);
        } else {
            allUsedVarsAfterOpMap.get(op).addAll(usedVarsPerOp);
        }

        for (Mutable<ILogicalOperator> inputOpRef : op.getInputs()) {
            collectUsedVars(inputOpRef, usedVarsPerOp);
        }

    }

    protected boolean introduceProjects(AbstractLogicalOperator parentOp, int parentInputIndex,
            Mutable<ILogicalOperator> opRef, Set<LogicalVariable> parentUsedVars, IOptimizationContext context)
            throws AlgebricksException {
        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
        boolean modified = false;

        usedVars.clear();
        VariableUtilities.getUsedVariables(op, usedVars);

        // In the top-down pass, maintain a set of variables that are used in op and all its parents.
        // This is a necessary step for the newly created project operator during this optimization,
        // since we already have all information from collectUsedVars() method for the other operators.
        HashSet<LogicalVariable> parentsUsedVars = new HashSet<>();
        parentsUsedVars.addAll(parentUsedVars);
        parentsUsedVars.addAll(usedVars);

        if (allUsedVarsAfterOpMap.get(op) != null) {
            parentsUsedVars.addAll(allUsedVarsAfterOpMap.get(op));
        }

        // Descend into children.
        for (int i = 0; i < op.getInputs().size(); i++) {
            Mutable<ILogicalOperator> inputOpRef = op.getInputs().get(i);
            if (introduceProjects(op, i, inputOpRef, parentsUsedVars, context)) {
                modified = true;
            }
        }

        if (modified) {
            context.computeAndSetTypeEnvironmentForOperator(op);
        }
        // In the bottom-up pass, determine which live variables are not used by op's parents.
        // Such variables are be projected away.
        liveVars.clear();
        VariableUtilities.getLiveVariables(op, liveVars);
        producedVars.clear();
        VariableUtilities.getProducedVariables(op, producedVars);
        liveVars.removeAll(producedVars);

        projectVars.clear();
        for (LogicalVariable liveVar : liveVars) {
            if (parentsUsedVars.contains(liveVar)) {
                projectVars.add(liveVar);
            }
        }

        // Some of the variables that are live at this op are not used above.
        if (projectVars.size() != liveVars.size()) {
            // Add a project operator under each of op's qualifying input branches.
            for (int i = 0; i < op.getInputs().size(); i++) {
                ILogicalOperator childOp = op.getInputs().get(i).getValue();
                liveVars.clear();
                VariableUtilities.getLiveVariables(childOp, liveVars);
                List<LogicalVariable> vars = new ArrayList<>();
                vars.addAll(projectVars);
                // Only retain those variables that are live in the i-th input branch.
                vars.retainAll(liveVars);
                if (vars.size() != liveVars.size()) {
                    ProjectOperator projectOp = new ProjectOperator(vars);
                    projectOp.getInputs().add(new MutableObject<ILogicalOperator>(childOp));
                    op.getInputs().get(i).setValue(projectOp);
                    context.computeAndSetTypeEnvironmentForOperator(projectOp);
                    modified = true;
                }
            }
        } else if (op.getOperatorTag() == LogicalOperatorTag.PROJECT) {
            // Check if the existing project has become useless.
            liveVars.clear();
            VariableUtilities.getLiveVariables(op.getInputs().get(0).getValue(), liveVars);
            ProjectOperator projectOp = (ProjectOperator) op;
            List<LogicalVariable> projectVarsTemp = projectOp.getVariables();
            if (liveVars.size() == projectVarsTemp.size() && liveVars.containsAll(projectVarsTemp)) {
                boolean eliminateProject = true;
                // For UnionAll the variables must also be in exactly the correct order.
                if (parentOp.getOperatorTag() == LogicalOperatorTag.UNIONALL) {
                    eliminateProject = canEliminateProjectBelowUnion((UnionAllOperator) parentOp, projectOp,
                            parentInputIndex);
                }
                if (eliminateProject) {
                    // The existing project has become useless. Remove it.
                    parentOp.getInputs().get(parentInputIndex).setValue(op.getInputs().get(0).getValue());
                }
            }
        }

        if (modified) {
            context.computeAndSetTypeEnvironmentForOperator(op);
        }
        return modified;
    }

    private boolean canEliminateProjectBelowUnion(UnionAllOperator unionOp, ProjectOperator projectOp,
            int unionInputIndex) throws AlgebricksException {
        List<LogicalVariable> orderedLiveVars = new ArrayList<>();
        VariableUtilities.getLiveVariables(projectOp.getInputs().get(0).getValue(), orderedLiveVars);
        int numVars = orderedLiveVars.size();
        for (int i = 0; i < numVars; i++) {
            Triple<LogicalVariable, LogicalVariable, LogicalVariable> varTriple = unionOp.getVariableMappings()
                    .get(i);
            if (unionInputIndex == 0) {
                if (varTriple.first != orderedLiveVars.get(i)) {
                    return false;
                }
            } else {
                if (varTriple.second != orderedLiveVars.get(i)) {
                    return false;
                }
            }
        }
        return true;
    }
}