edu.uci.ics.asterix.optimizer.rules.am.AbstractIntroduceAccessMethodRule.java Source code

Java tutorial

Introduction

Here is the source code for edu.uci.ics.asterix.optimizer.rules.am.AbstractIntroduceAccessMethodRule.java

Source

/*
 * Copyright 2009-2013 by The Regents of the University of California
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * you may obtain a copy of the License from
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.uci.ics.asterix.optimizer.rules.am;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.mutable.Mutable;

import edu.uci.ics.asterix.common.config.DatasetConfig.IndexType;
import edu.uci.ics.asterix.dataflow.data.common.AqlExpressionTypeComputer;
import edu.uci.ics.asterix.metadata.api.IMetadataEntity;
import edu.uci.ics.asterix.metadata.bootstrap.MetadataConstants;
import edu.uci.ics.asterix.metadata.declared.AqlMetadataProvider;
import edu.uci.ics.asterix.metadata.entities.Index;
import edu.uci.ics.asterix.metadata.utils.DatasetUtils;
import edu.uci.ics.asterix.om.base.AInt32;
import edu.uci.ics.asterix.om.base.AOrderedList;
import edu.uci.ics.asterix.om.base.AString;
import edu.uci.ics.asterix.om.constants.AsterixConstantValue;
import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
import edu.uci.ics.asterix.om.types.ARecordType;
import edu.uci.ics.asterix.om.types.BuiltinType;
import edu.uci.ics.asterix.om.types.IAType;
import edu.uci.ics.asterix.om.types.hierachy.ATypeHierarchy;
import edu.uci.ics.asterix.optimizer.rules.am.OptimizableOperatorSubTree.DataSourceType;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractLogicalExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;

/**
 * Class that embodies the commonalities between rewrite rules for access
 * methods.
 */
public abstract class AbstractIntroduceAccessMethodRule implements IAlgebraicRewriteRule {

    private AqlMetadataProvider metadataProvider;

    public abstract Map<FunctionIdentifier, List<IAccessMethod>> getAccessMethods();

    protected static void registerAccessMethod(IAccessMethod accessMethod,
            Map<FunctionIdentifier, List<IAccessMethod>> accessMethods) {
        List<FunctionIdentifier> funcs = accessMethod.getOptimizableFunctions();
        for (FunctionIdentifier funcIdent : funcs) {
            List<IAccessMethod> l = accessMethods.get(funcIdent);
            if (l == null) {
                l = new ArrayList<IAccessMethod>();
                accessMethods.put(funcIdent, l);
            }
            l.add(accessMethod);
        }
    }

    @Override
    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
        return false;
    }

    protected void setMetadataDeclarations(IOptimizationContext context) {
        metadataProvider = (AqlMetadataProvider) context.getMetadataProvider();
    }

    protected void fillSubTreeIndexExprs(OptimizableOperatorSubTree subTree,
            Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs, IOptimizationContext context)
            throws AlgebricksException {
        Iterator<Map.Entry<IAccessMethod, AccessMethodAnalysisContext>> amIt = analyzedAMs.entrySet().iterator();
        // Check applicability of indexes by access method type.
        while (amIt.hasNext()) {
            Map.Entry<IAccessMethod, AccessMethodAnalysisContext> entry = amIt.next();
            AccessMethodAnalysisContext amCtx = entry.getValue();
            // For the current access method type, map variables to applicable
            // indexes.
            fillAllIndexExprs(subTree, amCtx, context);
        }
    }

    protected void pruneIndexCandidates(Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs)
            throws AlgebricksException {
        Iterator<Map.Entry<IAccessMethod, AccessMethodAnalysisContext>> amIt = analyzedAMs.entrySet().iterator();
        // Check applicability of indexes by access method type.
        while (amIt.hasNext()) {
            Map.Entry<IAccessMethod, AccessMethodAnalysisContext> entry = amIt.next();
            AccessMethodAnalysisContext amCtx = entry.getValue();
            pruneIndexCandidates(entry.getKey(), amCtx);
            // Remove access methods for which there are definitely no
            // applicable indexes.
            if (amCtx.indexExprsAndVars.isEmpty()) {
                amIt.remove();
            }
        }
    }

    /**
     * Simply picks the first index that it finds. TODO: Improve this decision
     * process by making it more systematic.
     */
    protected Pair<IAccessMethod, Index> chooseIndex(Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs) {
        Iterator<Map.Entry<IAccessMethod, AccessMethodAnalysisContext>> amIt = analyzedAMs.entrySet().iterator();
        while (amIt.hasNext()) {
            Map.Entry<IAccessMethod, AccessMethodAnalysisContext> amEntry = amIt.next();
            AccessMethodAnalysisContext analysisCtx = amEntry.getValue();
            Iterator<Map.Entry<Index, List<Pair<Integer, Integer>>>> indexIt = analysisCtx.indexExprsAndVars
                    .entrySet().iterator();
            if (indexIt.hasNext()) {
                Map.Entry<Index, List<Pair<Integer, Integer>>> indexEntry = indexIt.next();
                // To avoid a case where the chosen access method and a chosen
                // index type is different.
                // Allowed Case: [BTreeAccessMethod , IndexType.BTREE],
                //               [RTreeAccessMethod , IndexType.RTREE],
                //               [InvertedIndexAccessMethod,
                //                 IndexType.SINGLE_PARTITION_WORD_INVIX ||
                //                           SINGLE_PARTITION_NGRAM_INVIX ||
                //                           LENGTH_PARTITIONED_WORD_INVIX ||
                //                           LENGTH_PARTITIONED_NGRAM_INVIX]
                IAccessMethod chosenAccessMethod = amEntry.getKey();
                Index chosenIndex = indexEntry.getKey();
                boolean isKeywordOrNgramIndexChosen = false;
                if (chosenIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX
                        || chosenIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX
                        || chosenIndex.getIndexType() == IndexType.SINGLE_PARTITION_WORD_INVIX
                        || chosenIndex.getIndexType() == IndexType.SINGLE_PARTITION_NGRAM_INVIX)
                    isKeywordOrNgramIndexChosen = true;
                if ((chosenAccessMethod == BTreeAccessMethod.INSTANCE
                        && chosenIndex.getIndexType() != IndexType.BTREE)
                        || (chosenAccessMethod == RTreeAccessMethod.INSTANCE
                                && chosenIndex.getIndexType() != IndexType.RTREE)
                        || (chosenAccessMethod == InvertedIndexAccessMethod.INSTANCE
                                && !isKeywordOrNgramIndexChosen)) {
                    continue;
                }
                return new Pair<IAccessMethod, Index>(chosenAccessMethod, chosenIndex);
            }
        }
        return null;
    }

    /**
     * Removes irrelevant access methods candidates, based on whether the
     * expressions in the query match those in the index. For example, some
     * index may require all its expressions to be matched, and some indexes may
     * only require a match on a prefix of fields to be applicable. This methods
     * removes all index candidates indexExprs that are definitely not
     * applicable according to the expressions involved.
     *
     * @throws AlgebricksException
     */
    public void pruneIndexCandidates(IAccessMethod accessMethod, AccessMethodAnalysisContext analysisCtx)
            throws AlgebricksException {
        Iterator<Map.Entry<Index, List<Pair<Integer, Integer>>>> indexExprAndVarIt = analysisCtx.indexExprsAndVars
                .entrySet().iterator();
        // Used to keep track of matched expressions (added for prefix search)
        int numMatchedKeys = 0;
        ArrayList<Integer> matchedExpressions = new ArrayList<Integer>();
        while (indexExprAndVarIt.hasNext()) {
            Map.Entry<Index, List<Pair<Integer, Integer>>> indexExprAndVarEntry = indexExprAndVarIt.next();
            Index index = indexExprAndVarEntry.getKey();
            boolean allUsed = true;
            int lastFieldMatched = -1;
            boolean foundKeyField = false;
            matchedExpressions.clear();
            numMatchedKeys = 0;
            for (int i = 0; i < index.getKeyFieldNames().size(); i++) {
                List<String> keyField = index.getKeyFieldNames().get(i);
                final IAType keyType = index.getKeyFieldTypes().get(i);
                Iterator<Pair<Integer, Integer>> exprsAndVarIter = indexExprAndVarEntry.getValue().iterator();
                while (exprsAndVarIter.hasNext()) {
                    final Pair<Integer, Integer> exprAndVarIdx = exprsAndVarIter.next();
                    final IOptimizableFuncExpr optFuncExpr = analysisCtx.matchedFuncExprs.get(exprAndVarIdx.first);
                    // If expr is not optimizable by concrete index then remove
                    // expr and continue.
                    if (!accessMethod.exprIsOptimizable(index, optFuncExpr)) {
                        exprsAndVarIter.remove();
                        continue;
                    }
                    boolean typeMatch = true;
                    //Prune indexes based on field types
                    List<IAType> indexedTypes = new ArrayList<IAType>();
                    //retrieve types of expressions joined/selected with an indexed field
                    for (int j = 0; j < optFuncExpr.getNumLogicalVars(); j++)
                        if (j != exprAndVarIdx.second)
                            indexedTypes.add(optFuncExpr.getFieldType(j));
                    //add constants in case of select
                    if (indexedTypes.size() < 2 && optFuncExpr.getNumLogicalVars() == 1) {
                        indexedTypes.add((IAType) AqlExpressionTypeComputer.INSTANCE
                                .getType(new ConstantExpression(optFuncExpr.getConstantVal(0)), null, null));
                    }
                    //infer type of logicalExpr based on index keyType
                    indexedTypes.add((IAType) AqlExpressionTypeComputer.INSTANCE.getType(
                            optFuncExpr.getLogicalExpr(exprAndVarIdx.second), null, new IVariableTypeEnvironment() {

                                @Override
                                public Object getVarType(LogicalVariable var) throws AlgebricksException {
                                    if (var.equals(optFuncExpr.getSourceVar(exprAndVarIdx.second)))
                                        return keyType;
                                    throw new IllegalArgumentException();
                                }

                                @Override
                                public Object getVarType(LogicalVariable var,
                                        List<LogicalVariable> nonNullVariables,
                                        List<List<LogicalVariable>> correlatedNullableVariableLists)
                                        throws AlgebricksException {
                                    if (var.equals(optFuncExpr.getSourceVar(exprAndVarIdx.second)))
                                        return keyType;
                                    throw new IllegalArgumentException();
                                }

                                @Override
                                public void setVarType(LogicalVariable var, Object type) {
                                    throw new IllegalArgumentException();
                                }

                                @Override
                                public Object getType(ILogicalExpression expr) throws AlgebricksException {
                                    return AqlExpressionTypeComputer.INSTANCE.getType(expr, null, this);
                                }

                                @Override
                                public boolean substituteProducedVariable(LogicalVariable v1, LogicalVariable v2)
                                        throws AlgebricksException {
                                    throw new IllegalArgumentException();
                                }
                            }));

                    //for the case when jaccard similarity is measured between ordered & unordered lists
                    boolean jaccardSimilarity = optFuncExpr.getFuncExpr().getFunctionIdentifier().getName()
                            .startsWith("similarity-jaccard-check");

                    for (int j = 0; j < indexedTypes.size(); j++)
                        for (int k = j + 1; k < indexedTypes.size(); k++)
                            typeMatch &= isMatched(indexedTypes.get(j), indexedTypes.get(k), jaccardSimilarity);

                    // Check if any field name in the optFuncExpr matches.
                    if (optFuncExpr.findFieldName(keyField) != -1) {
                        foundKeyField = typeMatch
                                && optFuncExpr.getOperatorSubTree(exprAndVarIdx.second).hasDataSourceScan();
                        if (foundKeyField) {
                            matchedExpressions.add(exprAndVarIdx.first);
                            numMatchedKeys++;
                            if (lastFieldMatched == i - 1) {
                                lastFieldMatched = i;
                            }
                            break;
                        }
                    }
                }
                if (!foundKeyField) {
                    allUsed = false;
                    // if any expression was matched, remove the non-matched expressions, otherwise the index is unusable
                    if (lastFieldMatched >= 0) {
                        exprsAndVarIter = indexExprAndVarEntry.getValue().iterator();
                        while (exprsAndVarIter.hasNext()) {
                            if (!matchedExpressions.contains(exprsAndVarIter.next().first)) {
                                exprsAndVarIter.remove();
                            }
                        }
                    }
                    break;
                }
            }
            // If the access method requires all exprs to be matched but they
            // are not, remove this candidate.
            if (!allUsed && accessMethod.matchAllIndexExprs()) {
                indexExprAndVarIt.remove();
                continue;
            }
            // A prefix of the index exprs may have been matched.
            if (accessMethod.matchPrefixIndexExprs()) {
                // Remove the candidate if the dataset is a metadata dataset and the index is secondary
                if (index.getDataverseName().equals(MetadataConstants.METADATA_DATAVERSE_NAME)
                        && !index.isPrimaryIndex()) {
                    indexExprAndVarIt.remove();
                    continue;
                }
                if (lastFieldMatched < 0) {
                    indexExprAndVarIt.remove();
                    continue;
                }
            }
            analysisCtx.indexNumMatchedKeys.put(index, new Integer(numMatchedKeys));
        }
    }

    private boolean isMatched(IAType type1, IAType type2, boolean useListDomain) throws AlgebricksException {
        if (ATypeHierarchy.isSameTypeDomain(Index.getNonNullableType(type1).first.getTypeTag(),
                Index.getNonNullableType(type2).first.getTypeTag(), useListDomain))
            return true;
        return ATypeHierarchy.canPromote(Index.getNonNullableType(type1).first.getTypeTag(),
                Index.getNonNullableType(type2).first.getTypeTag());
    }

    /**
     * Analyzes the given selection condition, filling analyzedAMs with
     * applicable access method types. At this point we are not yet consulting
     * the metadata whether an actual index exists or not.
     */
    protected boolean analyzeCondition(ILogicalExpression cond, List<AbstractLogicalOperator> assignsAndUnnests,
            Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs) {
        AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) cond;
        FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
        // Don't consider optimizing a disjunctive condition with an index (too
        // complicated for now).
        if (funcIdent == AlgebricksBuiltinFunctions.OR) {
            return false;
        }
        boolean found = analyzeFunctionExpr(funcExpr, assignsAndUnnests, analyzedAMs);
        for (Mutable<ILogicalExpression> arg : funcExpr.getArguments()) {
            ILogicalExpression argExpr = arg.getValue();
            if (argExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
                continue;
            }
            AbstractFunctionCallExpression argFuncExpr = (AbstractFunctionCallExpression) argExpr;
            boolean matchFound = analyzeFunctionExpr(argFuncExpr, assignsAndUnnests, analyzedAMs);
            found = found || matchFound;
        }
        return found;
    }

    /**
     * Finds applicable access methods for the given function expression based
     * on the function identifier, and an analysis of the function's arguments.
     * Updates the analyzedAMs accordingly.
     */
    protected boolean analyzeFunctionExpr(AbstractFunctionCallExpression funcExpr,
            List<AbstractLogicalOperator> assignsAndUnnests,
            Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs) {
        FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
        if (funcIdent == AlgebricksBuiltinFunctions.AND) {
            return false;
        }
        // Retrieves the list of access methods that are relevant based on the
        // funcIdent.
        List<IAccessMethod> relevantAMs = getAccessMethods().get(funcIdent);
        if (relevantAMs == null) {
            return false;
        }
        boolean atLeastOneMatchFound = false;
        // Place holder for a new analysis context in case we need one.
        AccessMethodAnalysisContext newAnalysisCtx = new AccessMethodAnalysisContext();
        for (IAccessMethod accessMethod : relevantAMs) {
            AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(accessMethod);
            // Use the current place holder.
            if (analysisCtx == null) {
                analysisCtx = newAnalysisCtx;
            }
            // Analyzes the funcExpr's arguments to see if the accessMethod is
            // truly applicable.
            boolean matchFound = accessMethod.analyzeFuncExprArgs(funcExpr, assignsAndUnnests, analysisCtx);
            if (matchFound) {
                // If we've used the current new context placeholder, replace it
                // with a new one.
                if (analysisCtx == newAnalysisCtx) {
                    analyzedAMs.put(accessMethod, analysisCtx);
                    newAnalysisCtx = new AccessMethodAnalysisContext();
                }
                atLeastOneMatchFound = true;
            }
        }
        return atLeastOneMatchFound;
    }

    /**
     * Finds secondary indexes whose keys include fieldName, and adds a mapping
     * in analysisCtx.indexEsprs from that index to the a corresponding
     * optimizable function expression.
     *
     * @return true if a candidate index was added to foundIndexExprs, false
     *         otherwise
     * @throws AlgebricksException
     */
    protected boolean fillIndexExprs(List<Index> datasetIndexes, List<String> fieldName, IAType fieldType,
            IOptimizableFuncExpr optFuncExpr, int matchedFuncExprIndex, int varIdx,
            OptimizableOperatorSubTree matchedSubTree, AccessMethodAnalysisContext analysisCtx)
            throws AlgebricksException {
        List<Index> indexCandidates = new ArrayList<Index>();
        // Add an index to the candidates if one of the indexed fields is
        // fieldName
        for (Index index : datasetIndexes) {
            // Need to also verify the index is pending no op
            if (index.getKeyFieldNames().contains(fieldName)
                    && index.getPendingOp() == IMetadataEntity.PENDING_NO_OP) {
                indexCandidates.add(index);
                if (optFuncExpr.getFieldType(varIdx) == BuiltinType.ANULL
                        || optFuncExpr.getFieldType(varIdx) == BuiltinType.ANY)
                    optFuncExpr.setFieldType(varIdx,
                            index.getKeyFieldTypes().get(index.getKeyFieldNames().indexOf(fieldName)));
                analysisCtx.addIndexExpr(matchedSubTree.dataset, index, matchedFuncExprIndex, varIdx);
            }
        }
        // No index candidates for fieldName.
        if (indexCandidates.isEmpty()) {
            return false;
        }
        return true;
    }

    protected void fillAllIndexExprs(OptimizableOperatorSubTree subTree, AccessMethodAnalysisContext analysisCtx,
            IOptimizationContext context) throws AlgebricksException {
        int optFuncExprIndex = 0;
        List<Index> datasetIndexes = new ArrayList<Index>();
        if (subTree.dataSourceType != DataSourceType.COLLECTION_SCAN)
            datasetIndexes = metadataProvider.getDatasetIndexes(subTree.dataset.getDataverseName(),
                    subTree.dataset.getDatasetName());
        for (IOptimizableFuncExpr optFuncExpr : analysisCtx.matchedFuncExprs) {
            // Try to match variables from optFuncExpr to assigns or unnests.
            for (int assignOrUnnestIndex = 0; assignOrUnnestIndex < subTree.assignsAndUnnests
                    .size(); assignOrUnnestIndex++) {
                AbstractLogicalOperator op = subTree.assignsAndUnnests.get(assignOrUnnestIndex);
                if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
                    AssignOperator assignOp = (AssignOperator) op;
                    List<LogicalVariable> varList = assignOp.getVariables();
                    for (int varIndex = 0; varIndex < varList.size(); varIndex++) {
                        LogicalVariable var = varList.get(varIndex);
                        int optVarIndex = optFuncExpr.findLogicalVar(var);
                        // No matching var in optFuncExpr.
                        if (optVarIndex == -1) {
                            continue;
                        }
                        // At this point we have matched the optimizable func
                        // expr at optFuncExprIndex to an assigned variable.
                        // Remember matching subtree.
                        optFuncExpr.setOptimizableSubTree(optVarIndex, subTree);
                        List<String> fieldName = getFieldNameFromSubTree(optFuncExpr, subTree, assignOrUnnestIndex,
                                varIndex, subTree.recordType, optVarIndex,
                                optFuncExpr.getFuncExpr().getArguments().get(optVarIndex).getValue());
                        if (fieldName == null) {
                            continue;
                        }
                        IAType fieldType = (IAType) context.getOutputTypeEnvironment(assignOp)
                                .getType(optFuncExpr.getLogicalExpr(optVarIndex));
                        // Set the fieldName in the corresponding matched
                        // function expression.
                        optFuncExpr.setFieldName(optVarIndex, fieldName);
                        optFuncExpr.setFieldType(optVarIndex, fieldType);

                        setTypeTag(context, subTree, optFuncExpr, optVarIndex);
                        if (subTree.hasDataSource()) {
                            fillIndexExprs(datasetIndexes, fieldName, fieldType, optFuncExpr, optFuncExprIndex,
                                    optVarIndex, subTree, analysisCtx);
                        }
                    }
                } else {
                    UnnestOperator unnestOp = (UnnestOperator) op;
                    LogicalVariable var = unnestOp.getVariable();
                    int funcVarIndex = optFuncExpr.findLogicalVar(var);
                    // No matching var in optFuncExpr.
                    if (funcVarIndex == -1) {
                        continue;
                    }
                    // At this point we have matched the optimizable func expr
                    // at optFuncExprIndex to an unnest variable.
                    // Remember matching subtree.
                    optFuncExpr.setOptimizableSubTree(funcVarIndex, subTree);
                    List<String> fieldName = null;
                    if (subTree.dataSourceType == DataSourceType.COLLECTION_SCAN) {
                        optFuncExpr.setLogicalExpr(funcVarIndex, new VariableReferenceExpression(var));
                    } else {
                        fieldName = getFieldNameFromSubTree(optFuncExpr, subTree, assignOrUnnestIndex, 0,
                                subTree.recordType, funcVarIndex,
                                optFuncExpr.getFuncExpr().getArguments().get(funcVarIndex).getValue());
                        if (fieldName == null) {
                            continue;
                        }
                    }
                    IAType fieldType = (IAType) context.getOutputTypeEnvironment(unnestOp)
                            .getType(optFuncExpr.getLogicalExpr(funcVarIndex));
                    // Set the fieldName in the corresponding matched function
                    // expression.
                    optFuncExpr.setFieldName(funcVarIndex, fieldName);
                    optFuncExpr.setFieldType(funcVarIndex, fieldType);

                    setTypeTag(context, subTree, optFuncExpr, funcVarIndex);
                    if (subTree.hasDataSource()) {
                        fillIndexExprs(datasetIndexes, fieldName, fieldType, optFuncExpr, optFuncExprIndex,
                                funcVarIndex, subTree, analysisCtx);
                    }
                }
            }

            // Try to match variables from optFuncExpr to datasourcescan if not
            // already matched in assigns.
            List<LogicalVariable> dsVarList = subTree.getDataSourceVariables();
            for (int varIndex = 0; varIndex < dsVarList.size(); varIndex++) {
                LogicalVariable var = dsVarList.get(varIndex);
                int funcVarIndex = optFuncExpr.findLogicalVar(var);
                // No matching var in optFuncExpr.
                if (funcVarIndex == -1) {
                    continue;
                }
                // The variable value is one of the partitioning fields.
                List<String> fieldName = DatasetUtils.getPartitioningKeys(subTree.dataset).get(varIndex);
                IAType fieldType = (IAType) context.getOutputTypeEnvironment(subTree.dataSourceRef.getValue())
                        .getVarType(var);
                // Set the fieldName in the corresponding matched function
                // expression, and remember matching subtree.
                optFuncExpr.setFieldName(funcVarIndex, fieldName);
                optFuncExpr.setOptimizableSubTree(funcVarIndex, subTree);
                optFuncExpr.setSourceVar(funcVarIndex, var);
                optFuncExpr.setLogicalExpr(funcVarIndex, new VariableReferenceExpression(var));
                setTypeTag(context, subTree, optFuncExpr, funcVarIndex);
                if (subTree.hasDataSourceScan()) {
                    fillIndexExprs(datasetIndexes, fieldName, fieldType, optFuncExpr, optFuncExprIndex,
                            funcVarIndex, subTree, analysisCtx);
                }
            }
            optFuncExprIndex++;
        }
    }

    private void setTypeTag(IOptimizationContext context, OptimizableOperatorSubTree subTree,
            IOptimizableFuncExpr optFuncExpr, int funcVarIndex) throws AlgebricksException {
        // Set the typeTag if the type is not null
        IAType type = (IAType) context.getOutputTypeEnvironment(subTree.root)
                .getVarType(optFuncExpr.getLogicalVar(funcVarIndex));
        optFuncExpr.setFieldType(funcVarIndex, type);
    }

    /**
     * Returns the field name corresponding to the assigned variable at
     * varIndex. Returns null if the expr at varIndex does not yield to a field
     * access function after following a set of allowed functions.
     * 
     * @throws AlgebricksException
     */
    protected List<String> getFieldNameFromSubTree(IOptimizableFuncExpr optFuncExpr,
            OptimizableOperatorSubTree subTree, int opIndex, int assignVarIndex, ARecordType recordType,
            int funcVarIndex, ILogicalExpression parentFuncExpr) throws AlgebricksException {
        // Get expression corresponding to opVar at varIndex.
        AbstractLogicalExpression expr = null;
        AbstractFunctionCallExpression childFuncExpr = null;
        AbstractLogicalOperator op = subTree.assignsAndUnnests.get(opIndex);
        if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
            AssignOperator assignOp = (AssignOperator) op;
            expr = (AbstractLogicalExpression) assignOp.getExpressions().get(assignVarIndex).getValue();
            childFuncExpr = (AbstractFunctionCallExpression) expr;
        } else {
            UnnestOperator unnestOp = (UnnestOperator) op;
            expr = (AbstractLogicalExpression) unnestOp.getExpressionRef().getValue();
            if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
                return null;
            }
            childFuncExpr = (AbstractFunctionCallExpression) expr;
            if (childFuncExpr.getFunctionIdentifier() != AsterixBuiltinFunctions.SCAN_COLLECTION) {
                return null;
            }
            expr = (AbstractLogicalExpression) childFuncExpr.getArguments().get(0).getValue();
        }
        if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
            return null;
        }
        AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
        FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();

        boolean isByName = false;
        boolean isFieldAccess = false;
        String fieldName = null;
        List<String> nestedAccessFieldName = null;
        int fieldIndex = -1;
        if (funcIdent == AsterixBuiltinFunctions.FIELD_ACCESS_BY_NAME) {
            ILogicalExpression nameArg = funcExpr.getArguments().get(1).getValue();
            if (nameArg.getExpressionTag() != LogicalExpressionTag.CONSTANT) {
                return null;
            }
            ConstantExpression constExpr = (ConstantExpression) nameArg;
            fieldName = ((AString) ((AsterixConstantValue) constExpr.getValue()).getObject()).getStringValue();
            isFieldAccess = true;
            isByName = true;
        } else if (funcIdent == AsterixBuiltinFunctions.FIELD_ACCESS_BY_INDEX) {
            ILogicalExpression idxArg = funcExpr.getArguments().get(1).getValue();
            if (idxArg.getExpressionTag() != LogicalExpressionTag.CONSTANT) {
                return null;
            }
            ConstantExpression constExpr = (ConstantExpression) idxArg;
            fieldIndex = ((AInt32) ((AsterixConstantValue) constExpr.getValue()).getObject()).getIntegerValue();
            isFieldAccess = true;
        } else if (funcIdent == AsterixBuiltinFunctions.FIELD_ACCESS_NESTED) {
            ILogicalExpression nameArg = funcExpr.getArguments().get(1).getValue();
            if (nameArg.getExpressionTag() != LogicalExpressionTag.CONSTANT) {
                return null;
            }
            ConstantExpression constExpr = (ConstantExpression) nameArg;
            AOrderedList orderedNestedFieldName = (AOrderedList) ((AsterixConstantValue) constExpr.getValue())
                    .getObject();
            nestedAccessFieldName = new ArrayList<String>();
            for (int i = 0; i < orderedNestedFieldName.size(); i++) {
                nestedAccessFieldName.add(((AString) orderedNestedFieldName.getItem(i)).getStringValue());
            }
            isFieldAccess = true;
            isByName = true;
        }
        if (isFieldAccess) {
            optFuncExpr.setLogicalExpr(funcVarIndex, parentFuncExpr);
            int[] assignAndExpressionIndexes = null;

            //go forward through nested assigns until you find the relevant one
            for (int i = opIndex + 1; i < subTree.assignsAndUnnests.size(); i++) {
                AbstractLogicalOperator subOp = subTree.assignsAndUnnests.get(i);
                List<LogicalVariable> varList;

                if (subOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
                    //Nested was an assign
                    varList = ((AssignOperator) subOp).getVariables();
                } else if (subOp.getOperatorTag() == LogicalOperatorTag.UNNEST) {
                    //Nested is not an assign
                    varList = ((UnnestOperator) subOp).getVariables();
                } else {
                    break;
                }

                //Go through variables in assign to check for match
                for (int varIndex = 0; varIndex < varList.size(); varIndex++) {
                    LogicalVariable var = varList.get(varIndex);
                    ArrayList<LogicalVariable> parentVars = new ArrayList<LogicalVariable>();
                    expr.getUsedVariables(parentVars);

                    if (parentVars.contains(var)) {
                        //Found the variable we are looking for.
                        //return assign and index of expression
                        int[] returnValues = { i, varIndex };
                        assignAndExpressionIndexes = returnValues;
                    }
                }
            }
            if (assignAndExpressionIndexes != null && assignAndExpressionIndexes[0] > -1) {
                //We found the nested assign

                //Recursive call on nested assign
                List<String> parentFieldNames = getFieldNameFromSubTree(optFuncExpr, subTree,
                        assignAndExpressionIndexes[0], assignAndExpressionIndexes[1], recordType, funcVarIndex,
                        parentFuncExpr);

                if (parentFieldNames == null) {
                    //Nested assign was not a field access. 
                    //We will not use index
                    return null;
                }

                if (!isByName) {
                    try {
                        fieldName = ((ARecordType) recordType.getSubFieldType(parentFieldNames))
                                .getFieldNames()[fieldIndex];
                    } catch (IOException e) {
                        throw new AlgebricksException(e);
                    }
                }
                optFuncExpr.setSourceVar(funcVarIndex, ((AssignOperator) op).getVariables().get(assignVarIndex));
                //add fieldName to the nested fieldName, return
                if (nestedAccessFieldName != null) {
                    for (int i = 0; i < nestedAccessFieldName.size(); i++) {
                        parentFieldNames.add(nestedAccessFieldName.get(i));
                    }
                } else {
                    parentFieldNames.add(fieldName);
                }
                return (parentFieldNames);
            }

            optFuncExpr.setSourceVar(funcVarIndex, ((AssignOperator) op).getVariables().get(assignVarIndex));
            //no nested assign, we are at the lowest level.
            if (isByName) {
                if (nestedAccessFieldName != null) {
                    return nestedAccessFieldName;
                }
                return new ArrayList<String>(Arrays.asList(fieldName));
            }
            return new ArrayList<String>(Arrays.asList(recordType.getFieldNames()[fieldIndex]));

        }

        if (funcIdent != AsterixBuiltinFunctions.WORD_TOKENS && funcIdent != AsterixBuiltinFunctions.GRAM_TOKENS
                && funcIdent != AsterixBuiltinFunctions.SUBSTRING
                && funcIdent != AsterixBuiltinFunctions.SUBSTRING_BEFORE
                && funcIdent != AsterixBuiltinFunctions.SUBSTRING_AFTER
                && funcIdent != AsterixBuiltinFunctions.CREATE_POLYGON
                && funcIdent != AsterixBuiltinFunctions.CREATE_MBR
                && funcIdent != AsterixBuiltinFunctions.CREATE_RECTANGLE
                && funcIdent != AsterixBuiltinFunctions.CREATE_CIRCLE
                && funcIdent != AsterixBuiltinFunctions.CREATE_LINE
                && funcIdent != AsterixBuiltinFunctions.CREATE_POINT) {
            return null;
        }
        // We use a part of the field in edit distance computation
        if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK) {
            optFuncExpr.setPartialField(true);
        }
        // We expect the function's argument to be a variable, otherwise we
        // cannot apply an index.
        ILogicalExpression argExpr = funcExpr.getArguments().get(0).getValue();
        if (argExpr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
            return null;
        }
        LogicalVariable curVar = ((VariableReferenceExpression) argExpr).getVariableReference();
        // We look for the assign or unnest operator that produces curVar below
        // the current operator
        for (int assignOrUnnestIndex = opIndex + 1; assignOrUnnestIndex < subTree.assignsAndUnnests
                .size(); assignOrUnnestIndex++) {
            AbstractLogicalOperator curOp = subTree.assignsAndUnnests.get(assignOrUnnestIndex);
            if (curOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
                AssignOperator assignOp = (AssignOperator) curOp;
                List<LogicalVariable> varList = assignOp.getVariables();
                for (int varIndex = 0; varIndex < varList.size(); varIndex++) {
                    LogicalVariable var = varList.get(varIndex);
                    if (var.equals(curVar)) {
                        optFuncExpr.setSourceVar(funcVarIndex, var);
                        return getFieldNameFromSubTree(optFuncExpr, subTree, assignOrUnnestIndex, varIndex,
                                recordType, funcVarIndex, childFuncExpr);
                    }
                }
            } else {
                UnnestOperator unnestOp = (UnnestOperator) curOp;
                LogicalVariable var = unnestOp.getVariable();
                if (var.equals(curVar)) {
                    getFieldNameFromSubTree(optFuncExpr, subTree, assignOrUnnestIndex, 0, recordType, funcVarIndex,
                            childFuncExpr);
                }
            }
        }
        return null;
    }
}