edu.uci.ics.asterix.optimizer.rules.RemoveUnusedOneToOneEquiJoinRule.java Source code

Java tutorial

Introduction

Here is the source code for edu.uci.ics.asterix.optimizer.rules.RemoveUnusedOneToOneEquiJoinRule.java

Source

/*
 * Copyright 2009-2013 by The Regents of the University of California
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * you may obtain a copy of the License from
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package edu.uci.ics.asterix.optimizer.rules;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.lang3.mutable.Mutable;

import edu.uci.ics.asterix.metadata.declared.DatasetDataSource;
import edu.uci.ics.asterix.metadata.entities.InternalDatasetDetails;
import edu.uci.ics.asterix.metadata.utils.DatasetUtils;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractLogicalExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;

/**
 * Removes join operators for which all of the following conditions are true:
 * 1. The live variables of one input branch of the join are not used in the upstream plan
 * 2. The join is an inner equi join
 * 3. The join condition only uses variables that correspond to primary keys of the same dataset
 * Notice that the last condition implies a 1:1 join, i.e., the join does not change the result cardinality.
 * Joins that satisfy the above conditions may be introduced by other rules
 * which use surrogate optimizations. Such an optimization aims to reduce data copies and communication costs by
 * using the primary keys as surrogates for the desired data items. Typically,
 * such a surrogate-based plan introduces a top-level join to finally resolve
 * the surrogates to the desired data items.
 * In case the upstream plan does not require the original data items at all, such a top-level join is unnecessary.
 * The purpose of this rule is to remove such unnecessary joins.
 */
public class RemoveUnusedOneToOneEquiJoinRule implements IAlgebraicRewriteRule {

    private final Set<LogicalVariable> parentsUsedVars = new HashSet<LogicalVariable>();
    private final List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
    private final List<LogicalVariable> liveVars = new ArrayList<LogicalVariable>();
    private final List<LogicalVariable> pkVars = new ArrayList<LogicalVariable>();
    private final List<DataSourceScanOperator> dataScans = new ArrayList<DataSourceScanOperator>();
    private boolean hasRun = false;

    @Override
    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
            throws AlgebricksException {
        if (hasRun) {
            return false;
        }
        hasRun = true;
        if (removeUnusedJoin(opRef)) {
            return true;
        }
        return false;
    }

    @Override
    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
            throws AlgebricksException {
        return false;
    }

    private boolean removeUnusedJoin(Mutable<ILogicalOperator> opRef) throws AlgebricksException {
        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
        boolean modified = false;

        usedVars.clear();
        VariableUtilities.getUsedVariables(op, usedVars);
        // Propagate used variables from parents downwards.
        parentsUsedVars.addAll(usedVars);

        int numInputs = op.getInputs().size();
        for (int i = 0; i < numInputs; i++) {
            Mutable<ILogicalOperator> childOpRef = op.getInputs().get(i);
            int unusedJoinBranchIndex = removeJoinFromInputBranch(childOpRef);
            if (unusedJoinBranchIndex >= 0) {
                int usedBranchIndex = (unusedJoinBranchIndex == 0) ? 1 : 0;
                // Remove join at input index i, by hooking up op's input i with 
                // the join's branch at unusedJoinBranchIndex.
                AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) childOpRef.getValue();
                op.getInputs().set(i, joinOp.getInputs().get(usedBranchIndex));
                modified = true;
            }
            // Descend into children.
            if (removeUnusedJoin(childOpRef)) {
                modified = true;
            }
        }
        return modified;
    }

    private int removeJoinFromInputBranch(Mutable<ILogicalOperator> opRef) throws AlgebricksException {
        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
        if (op.getOperatorTag() != LogicalOperatorTag.INNERJOIN) {
            return -1;
        }

        AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) op;
        // Make sure the join is an equi-join.
        if (!isEquiJoin(joinOp.getCondition())) {
            return -1;
        }

        int unusedJoinBranchIndex = -1;
        for (int i = 0; i < joinOp.getInputs().size(); i++) {
            liveVars.clear();
            VariableUtilities.getLiveVariables(joinOp.getInputs().get(i).getValue(), liveVars);
            liveVars.retainAll(parentsUsedVars);
            if (liveVars.isEmpty()) {
                // None of the live variables from this branch are used by its parents.
                unusedJoinBranchIndex = i;
                break;
            }
        }
        if (unusedJoinBranchIndex < 0) {
            // The variables from both branches are used in the upstream plan. We cannot remove this join.
            return -1;
        }

        // Check whether one of the join branches is unused.
        usedVars.clear();
        VariableUtilities.getUsedVariables(joinOp, usedVars);

        // Check whether all used variables originate from primary keys of exactly the same dataset.
        // Collect a list of datascans whose primary key variables are used in the join condition.
        gatherProducingDataScans(opRef, usedVars, dataScans);

        // Check that all datascans scan the same dataset, and that the join condition
        // only used primary key variables of those datascans.
        for (int i = 0; i < dataScans.size(); i++) {
            if (i > 0) {
                DatasetDataSource prevAqlDataSource = (DatasetDataSource) dataScans.get(i - 1).getDataSource();
                DatasetDataSource currAqlDataSource = (DatasetDataSource) dataScans.get(i).getDataSource();
                if (!prevAqlDataSource.getDataset().equals(currAqlDataSource.getDataset())) {
                    return -1;
                }
            }
            // Remove from the used variables all the primary key vars of this dataset.
            fillPKVars(dataScans.get(i), pkVars);
            usedVars.removeAll(pkVars);
        }
        if (!usedVars.isEmpty()) {
            // The join condition also uses some other variables that are not primary
            // keys from datasource scans of the same dataset.
            return -1;
        }
        return unusedJoinBranchIndex;
    }

    private void gatherProducingDataScans(Mutable<ILogicalOperator> opRef, List<LogicalVariable> joinUsedVars,
            List<DataSourceScanOperator> dataScans) {
        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
        if (op.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN) {
            for (Mutable<ILogicalOperator> inputOp : op.getInputs()) {
                gatherProducingDataScans(inputOp, joinUsedVars, dataScans);
            }
            return;
        }
        DataSourceScanOperator dataScan = (DataSourceScanOperator) op;
        fillPKVars(dataScan, pkVars);
        // Check if join uses all PK vars.
        if (joinUsedVars.containsAll(pkVars)) {
            dataScans.add(dataScan);
        }
    }

    private void fillPKVars(DataSourceScanOperator dataScan, List<LogicalVariable> pkVars) {
        pkVars.clear();
        DatasetDataSource datasetDataSource = (DatasetDataSource) dataScan.getDataSource();
        pkVars.clear();
        if (datasetDataSource.getDataset().getDatasetDetails() instanceof InternalDatasetDetails) {
            int numPKs = DatasetUtils.getPartitioningKeys(datasetDataSource.getDataset()).size();
            for (int i = 0; i < numPKs; i++) {
                pkVars.add(dataScan.getVariables().get(i));
            }
        }
    }

    private boolean isEquiJoin(Mutable<ILogicalExpression> conditionExpr) {
        AbstractLogicalExpression expr = (AbstractLogicalExpression) conditionExpr.getValue();
        if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
            AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
            FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
            if (funcIdent != AlgebricksBuiltinFunctions.AND && funcIdent != AlgebricksBuiltinFunctions.EQ) {
                return false;
            }
        }
        return true;
    }
}