org.apache.pig.impl.logicalLayer.optimizer.OpLimitOptimizer.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.pig.impl.logicalLayer.optimizer.OpLimitOptimizer.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.pig.impl.logicalLayer.optimizer;

import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.ExecType;
import org.apache.pig.PigException;
import org.apache.pig.impl.logicalLayer.LOCogroup;
import org.apache.pig.impl.logicalLayer.LOCross;
import org.apache.pig.impl.logicalLayer.LODistinct;
import org.apache.pig.impl.logicalLayer.LOFilter;
import org.apache.pig.impl.logicalLayer.LOForEach;
import org.apache.pig.impl.logicalLayer.LOLimit;
import org.apache.pig.impl.logicalLayer.LOLoad;
import org.apache.pig.impl.logicalLayer.LOSort;
import org.apache.pig.impl.logicalLayer.LOSplit;
import org.apache.pig.impl.logicalLayer.LOSplitOutput;
import org.apache.pig.impl.logicalLayer.LOUnion;
import org.apache.pig.impl.logicalLayer.LOJoin;
import org.apache.pig.impl.logicalLayer.LOJoin;
import org.apache.pig.impl.logicalLayer.LogicalOperator;
import org.apache.pig.impl.logicalLayer.LogicalPlan;
import org.apache.pig.impl.logicalLayer.LogicalPlanCloner;
import org.apache.pig.impl.plan.DepthFirstWalker;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.plan.optimizer.OptimizerException;

/**
 * A visitor to discover if any schema has been specified for a file being
 * loaded.  If so, a projection will be injected into the plan to cast the
 * data being loaded to the appropriate types.  The optimizer can then come
 * along and move those casts as far down as possible, or in some cases remove
 * them altogether.  This visitor does not handle finding the schemas for the 
 * file, that has already been done as part of parsing.
 *
 */
public class OpLimitOptimizer extends LogicalTransformer {

    private static final Log log = LogFactory.getLog(OpLimitOptimizer.class);
    private ExecType mode = ExecType.MAPREDUCE;

    public OpLimitOptimizer(LogicalPlan plan) {
        super(plan);
    }

    public OpLimitOptimizer(LogicalPlan plan, ExecType mode) {
        super(plan);
        this.mode = mode;
    }

    @Override
    public boolean check(List<LogicalOperator> nodes) throws OptimizerException {
        if ((nodes == null) || (nodes.size() <= 0)) {
            int errCode = 2052;
            String msg = "Internal error. Cannot retrieve operator from null or empty list.";
            throw new OptimizerException(msg, errCode, PigException.BUG);
        }

        try {
            LogicalOperator lo = nodes.get(0);
            if (lo == null || !(lo instanceof LOLimit)) {
                int errCode = 2005;
                String msg = "Expected " + LOLimit.class.getSimpleName() + ", got "
                        + (lo == null ? lo : lo.getClass().getSimpleName());
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
            List<LogicalOperator> predecessors = mPlan.getPredecessors(lo);
            if (predecessors.size() != 1) {
                int errCode = 2008;
                String msg = "Limit cannot have more than one input. Found " + predecessors.size() + " inputs.";
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
            LogicalOperator predecessor = predecessors.get(0);

            // Limit cannot be pushed up
            if (predecessor instanceof LOCogroup || predecessor instanceof LOFilter || predecessor instanceof LOLoad
                    || predecessor instanceof LOSplit || predecessor instanceof LODistinct
                    || predecessor instanceof LOJoin) {
                return false;
            }
            // Limit cannot be pushed in front of ForEach if it has a flatten
            if (predecessor instanceof LOForEach) {
                LOForEach loForEach = (LOForEach) predecessor;
                List<Boolean> mFlatten = loForEach.getFlatten();
                boolean hasFlatten = false;
                for (Boolean b : mFlatten)
                    if (b.equals(true))
                        hasFlatten = true;

                if (hasFlatten) {
                    return false;
                }
            }
        } catch (Exception e) {
            int errCode = 2049;
            String msg = "Error while performing checks to optimize limit operator.";
            throw new OptimizerException(msg, errCode, PigException.BUG);
        }

        return true;
    }

    @Override
    public void transform(List<LogicalOperator> nodes) throws OptimizerException {
        if ((nodes == null) || (nodes.size() <= 0)) {
            int errCode = 2052;
            String msg = "Internal error. Cannot retrieve operator from null or empty list.";
            throw new OptimizerException(msg, errCode, PigException.BUG);
        }
        try {
            LogicalOperator lo = nodes.get(0);
            if (lo == null || !(lo instanceof LOLimit)) {
                int errCode = 2005;
                String msg = "Expected " + LOLimit.class.getSimpleName() + ", got "
                        + (lo == null ? lo : lo.getClass().getSimpleName());
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }

            LOLimit limit = (LOLimit) lo;

            processNode(limit);
        } catch (OptimizerException oe) {
            throw oe;
        } catch (Exception e) {
            int errCode = 2050;
            String msg = "Internal error. Unable to optimize limit operator.";
            throw new OptimizerException(msg, errCode, PigException.BUG);
        }
    }

    // We recursively optimize a LOLimit, until one of the following conditions occurs:
    //   1. LOLimit can not move up
    //   2. LOLimit merged into another LOSort or another LOLimit
    // If we duplicate a LOLimit, then we leave the old LOLimit unmoved, 
    //    and recursively optimize the new LOLimit
    public void processNode(LOLimit limit) throws OptimizerException {
        try {
            List<LogicalOperator> predecessors = mPlan.getPredecessors(limit);
            if (predecessors.size() != 1) {
                int errCode = 2008;
                String msg = "Limit cannot have more than one input. Found " + predecessors.size() + " inputs.";
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
            LogicalOperator predecessor = predecessors.get(0);

            // Limit cannot be pushed up
            if (predecessor instanceof LOCogroup || predecessor instanceof LOFilter || predecessor instanceof LOLoad
                    || predecessor instanceof LOSplit || predecessor instanceof LODistinct
                    || predecessor instanceof LOJoin) {
                return;
            }
            // Limit can be pushed in front of ForEach if it does not have a flatten
            else if (predecessor instanceof LOForEach) {
                LOForEach loForEach = (LOForEach) predecessor;
                List<Boolean> mFlatten = loForEach.getFlatten();
                boolean hasFlatten = false;
                for (Boolean b : mFlatten)
                    if (b.equals(true))
                        hasFlatten = true;

                // We can safely move LOLimit up
                if (!hasFlatten) {
                    // Get operator before LOFilter
                    LogicalOperator prepredecessor = mPlan.getPredecessors(predecessor).get(0);
                    if (prepredecessor != null) {
                        try {
                            mPlan.removeAndReconnect(limit);
                            insertBetween(prepredecessor, limit, predecessor, null);

                        } catch (Exception e) {
                            int errCode = 2009;
                            String msg = "Can not move LOLimit up";
                            throw new OptimizerException(msg, errCode, PigException.BUG, e);
                        }
                    } else {
                        int errCode = 2010;
                        String msg = "LOForEach should have one input";
                        throw new OptimizerException(msg, errCode, PigException.BUG);
                    }
                    // we can move LOLimit even further, recursively optimize LOLimit
                    processNode(limit);
                }
            }
            // Limit can be duplicated, and the new instance pushed in front of an operator for the following operators 
            // (that is, if you have X->limit, you can transform that to limit->X->limit):
            else if (predecessor instanceof LOCross || predecessor instanceof LOUnion) {
                LOLimit newLimit = null;
                List<LogicalOperator> nodesToProcess = new ArrayList<LogicalOperator>();
                for (LogicalOperator prepredecessor : mPlan.getPredecessors(predecessor))
                    nodesToProcess.add(prepredecessor);
                for (LogicalOperator prepredecessor : nodesToProcess) {
                    try {
                        newLimit = limit.duplicate();
                        insertBetween(prepredecessor, newLimit, predecessor, null);
                    } catch (Exception e) {
                        int errCode = 2011;
                        String msg = "Can not insert LOLimit clone";
                        throw new OptimizerException(msg, errCode, PigException.BUG, e);
                    }
                    // we can move the new LOLimit even further, recursively optimize LOLimit
                    processNode(newLimit);
                }
            }
            // Limit can be merged into LOSort, result a "limited sort"
            else if (predecessor instanceof LOSort) {
                if (mode == ExecType.LOCAL) {
                    //We don't need this optimisation to happen in the local mode.
                    //so we do nothing here.
                } else {
                    LOSort sort = (LOSort) predecessor;
                    if (sort.getLimit() == -1)
                        sort.setLimit(limit.getLimit());
                    else
                        sort.setLimit(sort.getLimit() < limit.getLimit() ? sort.getLimit() : limit.getLimit());
                    try {
                        mPlan.removeAndReconnect(limit);
                    } catch (Exception e) {
                        int errCode = 2012;
                        String msg = "Can not remove LOLimit after LOSort";
                        throw new OptimizerException(msg, errCode, PigException.BUG, e);
                    }
                }
            }
            // Limit is merged into another LOLimit
            else if (predecessor instanceof LOLimit) {
                LOLimit beforeLimit = (LOLimit) predecessor;
                beforeLimit.setLimit(
                        beforeLimit.getLimit() < limit.getLimit() ? beforeLimit.getLimit() : limit.getLimit());
                try {
                    mPlan.removeAndReconnect(limit);
                } catch (Exception e) {
                    int errCode = 2012;
                    String msg = "Can not remove LOLimit after LOLimit";
                    throw new OptimizerException(msg, errCode, PigException.BUG, e);
                }
            }
            // Limit and OrderBy (LOSort) can be separated by split
            else if (predecessor instanceof LOSplitOutput) {
                if (mode == ExecType.LOCAL) {
                    //We don't need this optimisation to happen in the local mode.
                    //so we do nothing here.
                } else {
                    List<LogicalOperator> grandparants = mPlan.getPredecessors(predecessor);
                    // After insertion of splitters, any node in the plan can 
                    // have at most one predecessor
                    if (grandparants != null && grandparants.size() != 0
                            && grandparants.get(0) instanceof LOSplit) {
                        List<LogicalOperator> greatGrandparants = mPlan.getPredecessors(grandparants.get(0));
                        if (greatGrandparants != null && greatGrandparants.size() != 0
                                && greatGrandparants.get(0) instanceof LOSort) {
                            LOSort sort = (LOSort) greatGrandparants.get(0);
                            LOSort newSort = new LOSort(sort.getPlan(),
                                    new OperatorKey(sort.getOperatorKey().scope,
                                            NodeIdGenerator.getGenerator()
                                                    .getNextNodeId(sort.getOperatorKey().scope)),
                                    sort.getSortColPlans(), sort.getAscendingCols(), sort.getUserFunc());

                            newSort.setLimit(limit.getLimit());
                            try {
                                mPlan.replace(limit, newSort);
                            } catch (PlanException e) {
                                int errCode = 2012;
                                String msg = "Can not replace LOLimit with LOSort after splitter";
                                throw new OptimizerException(msg, errCode, PigException.BUG, e);
                            }
                        }
                    }
                }
            } else {
                int errCode = 2013;
                String msg = "Moving LOLimit in front of " + predecessor.getClass().getSimpleName()
                        + " is not implemented";
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
        } catch (OptimizerException oe) {
            throw oe;
        }
    }
}