org.apache.hadoop.mapred.split.TezMapredSplitsGrouper.java Source code

Introduction

Here is the source code for org.apache.hadoop.mapred.split.TezMapredSplitsGrouper.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred.split;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapreduce.split.TezMapReduceSplitsGrouper;
import org.apache.hadoop.yarn.util.RackResolver;
import org.apache.tez.dag.api.TezUncheckedException;

import com.google.common.base.Preconditions;

/**
 * A Helper that provides grouping logic to group InputSplits
 * using various parameters. A {@link TezGroupedSplit} is used
 * to wrap the real InputSplits in a group.
 */
@Public
@Evolving
public class TezMapredSplitsGrouper {
    private static final Log LOG = LogFactory.getLog(TezMapredSplitsGrouper.class);

    class SplitHolder {
        InputSplit split;
        boolean isProcessed = false;

        SplitHolder(InputSplit split) {
            this.split = split;
        }
    }

    class LocationHolder {
        List<SplitHolder> splits;
        int headIndex = 0;

        LocationHolder(int capacity) {
            splits = new ArrayList<SplitHolder>(capacity);
        }

        boolean isEmpty() {
            return (headIndex == splits.size());
        }

        SplitHolder getUnprocessedHeadSplit() {
            while (!isEmpty()) {
                SplitHolder holder = splits.get(headIndex);
                if (!holder.isProcessed) {
                    return holder;
                }
                incrementHeadIndex();
            }
            return null;
        }

        void incrementHeadIndex() {
            headIndex++;
        }
    }

    Map<String, LocationHolder> createLocationsMap(Configuration conf) {
        if (conf.getBoolean(TezMapReduceSplitsGrouper.TEZ_GROUPING_REPEATABLE,
                TezMapReduceSplitsGrouper.TEZ_GROUPING_REPEATABLE_DEFAULT)) {
            return new TreeMap<String, LocationHolder>();
        }
        return new HashMap<String, LocationHolder>();
    }

    public InputSplit[] getGroupedSplits(Configuration conf, InputSplit[] originalSplits, int desiredNumSplits,
            String wrappedInputFormatName) throws IOException {
        LOG.info("Grouping splits in Tez");

        int configNumSplits = conf.getInt(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_COUNT, 0);
        if (configNumSplits > 0) {
            // always use config override if specified
            desiredNumSplits = configNumSplits;
            LOG.info("Desired numSplits overridden by config to: " + desiredNumSplits);
        }

        if (!(configNumSplits > 0 || originalSplits == null || originalSplits.length == 0)) {
            // numSplits has not been overridden by config
            // numSplits has been set at runtime
            // there are splits generated
            // Do sanity checks
            long totalLength = 0;
            for (InputSplit split : originalSplits) {
                totalLength += split.getLength();
            }

            int splitCount = desiredNumSplits > 0 ? desiredNumSplits : originalSplits.length;
            long lengthPerGroup = totalLength / splitCount;

            long maxLengthPerGroup = conf.getLong(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MAX_SIZE,
                    TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MAX_SIZE_DEFAULT);
            long minLengthPerGroup = conf.getLong(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE,
                    TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT);
            if (maxLengthPerGroup < minLengthPerGroup || minLengthPerGroup <= 0) {
                throw new TezUncheckedException("Invalid max/min group lengths. Required min>0, max>=min. "
                        + " max: " + maxLengthPerGroup + " min: " + minLengthPerGroup);
            }
            if (lengthPerGroup > maxLengthPerGroup) {
                // splits too big to work. Need to override with max size.
                int newDesiredNumSplits = (int) (totalLength / maxLengthPerGroup) + 1;
                LOG.info("Desired splits: " + desiredNumSplits + " too small. " + " Desired splitLength: "
                        + lengthPerGroup + " Max splitLength: " + maxLengthPerGroup + " New desired splits: "
                        + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: "
                        + originalSplits.length);

                desiredNumSplits = newDesiredNumSplits;
            } else if (lengthPerGroup < minLengthPerGroup) {
                // splits too small to work. Need to override with size.
                int newDesiredNumSplits = (int) (totalLength / minLengthPerGroup) + 1;
                LOG.info("Desired splits: " + desiredNumSplits + " too large. " + " Desired splitLength: "
                        + lengthPerGroup + " Min splitLength: " + minLengthPerGroup + " New desired splits: "
                        + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: "
                        + originalSplits.length);

                desiredNumSplits = newDesiredNumSplits;
            }
        }

        if (originalSplits == null) {
            LOG.info("Null original splits");
            return null;
        }

        if (desiredNumSplits == 0 || originalSplits.length == 0 || desiredNumSplits >= originalSplits.length) {
            // nothing set. so return all the splits as is
            LOG.info("Using original number of splits: " + originalSplits.length + " desired splits: "
                    + desiredNumSplits);
            InputSplit[] groupedSplits = new TezGroupedSplit[originalSplits.length];
            int i = 0;
            for (InputSplit split : originalSplits) {
                TezGroupedSplit newSplit = new TezGroupedSplit(1, wrappedInputFormatName, split.getLocations());
                newSplit.addSplit(split);
                groupedSplits[i++] = newSplit;
            }
            return groupedSplits;
        }

        String emptyLocation = "EmptyLocation";
        String[] emptyLocations = { emptyLocation };
        List<InputSplit> groupedSplitsList = new ArrayList<InputSplit>(desiredNumSplits);

        long totalLength = 0;
        Map<String, LocationHolder> distinctLocations = createLocationsMap(conf);
        // go through splits and add them to locations
        for (InputSplit split : originalSplits) {
            totalLength += split.getLength();
            String[] locations = split.getLocations();
            if (locations == null || locations.length == 0) {
                locations = emptyLocations;
            }
            for (String location : locations) {
                if (location == null) {
                    location = emptyLocation;
                }
                distinctLocations.put(location, null);
            }
        }

        long lengthPerGroup = totalLength / desiredNumSplits;
        int numNodeLocations = distinctLocations.size();
        int numSplitsPerLocation = originalSplits.length / numNodeLocations;
        int numSplitsInGroup = originalSplits.length / desiredNumSplits;

        // allocation loop here so that we have a good initial size for the lists
        for (String location : distinctLocations.keySet()) {
            distinctLocations.put(location, new LocationHolder(numSplitsPerLocation + 1));
        }

        Set<String> locSet = new HashSet<String>();
        for (InputSplit split : originalSplits) {
            locSet.clear();
            SplitHolder splitHolder = new SplitHolder(split);
            String[] locations = split.getLocations();
            if (locations == null || locations.length == 0) {
                locations = emptyLocations;
            }
            for (String location : locations) {
                if (location == null) {
                    location = emptyLocation;
                }
                locSet.add(location);
            }
            for (String location : locSet) {
                LocationHolder holder = distinctLocations.get(location);
                holder.splits.add(splitHolder);
            }
        }

        boolean groupByLength = conf.getBoolean(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH,
                TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH_DEFAULT);
        boolean groupByCount = conf.getBoolean(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT,
                TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT_DEFAULT);
        if (!(groupByLength || groupByCount)) {
            throw new TezUncheckedException("None of the grouping parameters are true: "
                    + TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH + ", "
                    + TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT);
        }
        LOG.info("Desired numSplits: " + desiredNumSplits + " lengthPerGroup: " + lengthPerGroup + " numLocations: "
                + numNodeLocations + " numSplitsPerLocation: " + numSplitsPerLocation + " numSplitsInGroup: "
                + numSplitsInGroup + " totalLength: " + totalLength + " numOriginalSplits: " + originalSplits.length
                + " . Grouping by length: " + groupByLength + " count: " + groupByCount);

        // go through locations and group splits
        int splitsProcessed = 0;
        List<SplitHolder> group = new ArrayList<SplitHolder>(numSplitsInGroup + 1);
        Set<String> groupLocationSet = new HashSet<String>(10);
        boolean allowSmallGroups = false;
        boolean doingRackLocal = false;
        int iterations = 0;
        while (splitsProcessed < originalSplits.length) {
            iterations++;
            int numFullGroupsCreated = 0;
            for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
                group.clear();
                groupLocationSet.clear();
                String location = entry.getKey();
                LocationHolder holder = entry.getValue();
                SplitHolder splitHolder = holder.getUnprocessedHeadSplit();
                if (splitHolder == null) {
                    // all splits on node processed
                    continue;
                }
                int oldHeadIndex = holder.headIndex;
                long groupLength = 0;
                int groupNumSplits = 0;
                do {
                    group.add(splitHolder);
                    groupLength += splitHolder.split.getLength();
                    groupNumSplits++;
                    holder.incrementHeadIndex();
                    splitHolder = holder.getUnprocessedHeadSplit();
                } while (splitHolder != null
                        && (!groupByLength || (groupLength + splitHolder.split.getLength() <= lengthPerGroup))
                        && (!groupByCount || (groupNumSplits + 1 <= numSplitsInGroup)));

                if (holder.isEmpty() && !allowSmallGroups && (!groupByLength || groupLength < lengthPerGroup / 2)
                        && (!groupByCount || groupNumSplits < numSplitsInGroup / 2)) {
                    // group too small, reset it
                    holder.headIndex = oldHeadIndex;
                    continue;
                }

                numFullGroupsCreated++;

                // One split group created
                String[] groupLocation = { location };
                if (location == emptyLocation) {
                    groupLocation = null;
                } else if (doingRackLocal) {
                    for (SplitHolder splitH : group) {
                        String[] locations = splitH.split.getLocations();
                        if (locations != null) {
                            for (String loc : locations) {
                                if (loc != null) {
                                    groupLocationSet.add(loc);
                                }
                            }
                        }
                    }
                    groupLocation = groupLocationSet.toArray(groupLocation);
                }
                TezGroupedSplit groupedSplit = new TezGroupedSplit(group.size(), wrappedInputFormatName,
                        groupLocation,
                        // pass rack local hint directly to AM
                        ((doingRackLocal && location != emptyLocation) ? location : null));
                for (SplitHolder groupedSplitHolder : group) {
                    groupedSplit.addSplit(groupedSplitHolder.split);
                    Preconditions.checkState(groupedSplitHolder.isProcessed == false,
                            "Duplicates in grouping at location: " + location);
                    groupedSplitHolder.isProcessed = true;
                    splitsProcessed++;
                }
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Grouped " + group.size() + " length: " + groupedSplit.getLength() + " split at: "
                            + location);
                }
                groupedSplitsList.add(groupedSplit);
            }

            if (!doingRackLocal && numFullGroupsCreated < 1) {
                // no node could create a node-local group. go rack-local
                doingRackLocal = true;
                // re-create locations
                int numRemainingSplits = originalSplits.length - splitsProcessed;
                Set<InputSplit> remainingSplits = new HashSet<InputSplit>(numRemainingSplits);
                // gather remaining splits.
                for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
                    LocationHolder locHolder = entry.getValue();
                    while (!locHolder.isEmpty()) {
                        SplitHolder splitHolder = locHolder.getUnprocessedHeadSplit();
                        if (splitHolder != null) {
                            remainingSplits.add(splitHolder.split);
                            locHolder.incrementHeadIndex();
                        }
                    }
                }
                if (remainingSplits.size() != numRemainingSplits) {
                    throw new TezUncheckedException(
                            "Expected: " + numRemainingSplits + " got: " + remainingSplits.size());
                }

                // doing all this now instead of up front because the number of remaining
                // splits is expected to be much smaller
                RackResolver.init(conf);
                Map<String, String> locToRackMap = new HashMap<String, String>(distinctLocations.size());
                Map<String, LocationHolder> rackLocations = createLocationsMap(conf);
                for (String location : distinctLocations.keySet()) {
                    String rack = emptyLocation;
                    if (location != emptyLocation) {
                        rack = RackResolver.resolve(location).getNetworkLocation();
                    }
                    locToRackMap.put(location, rack);
                    if (rackLocations.get(rack) == null) {
                        // splits will probably be located in all racks
                        rackLocations.put(rack, new LocationHolder(numRemainingSplits));
                    }
                }
                distinctLocations.clear();
                HashSet<String> rackSet = new HashSet<String>(rackLocations.size());
                int numRackSplitsToGroup = remainingSplits.size();
                for (InputSplit split : originalSplits) {
                    if (numRackSplitsToGroup == 0) {
                        break;
                    }
                    // Iterate through the original splits in their order and consider them for grouping. 
                    // This maintains the original ordering in the list and thus subsequent grouping will 
                    // maintain that order
                    if (!remainingSplits.contains(split)) {
                        continue;
                    }
                    numRackSplitsToGroup--;
                    rackSet.clear();
                    SplitHolder splitHolder = new SplitHolder(split);
                    String[] locations = split.getLocations();
                    if (locations == null || locations.length == 0) {
                        locations = emptyLocations;
                    }
                    for (String location : locations) {
                        if (location == null) {
                            location = emptyLocation;
                        }
                        rackSet.add(locToRackMap.get(location));
                    }
                    for (String rack : rackSet) {
                        rackLocations.get(rack).splits.add(splitHolder);
                    }
                }
                remainingSplits.clear();
                distinctLocations = rackLocations;
                // adjust split length to be smaller because the data is non local
                float rackSplitReduction = conf.getFloat(
                        TezMapReduceSplitsGrouper.TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION,
                        TezMapReduceSplitsGrouper.TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION_DEFAULT);
                if (rackSplitReduction > 0) {
                    long newLengthPerGroup = (long) (lengthPerGroup * rackSplitReduction);
                    int newNumSplitsInGroup = (int) (numSplitsInGroup * rackSplitReduction);
                    if (newLengthPerGroup > 0) {
                        lengthPerGroup = newLengthPerGroup;
                    }
                    if (newNumSplitsInGroup > 0) {
                        numSplitsInGroup = newNumSplitsInGroup;
                    }
                }

                LOG.info("Doing rack local after iteration: " + iterations + " splitsProcessed: " + splitsProcessed
                        + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                        + groupedSplitsList.size() + " lengthPerGroup: " + lengthPerGroup + " numSplitsInGroup: "
                        + numSplitsInGroup);

                // dont do smallGroups for the first pass
                continue;
            }

            if (!allowSmallGroups && numFullGroupsCreated <= numNodeLocations / 10) {
                // a few nodes have a lot of data or data is thinly spread across nodes
                // so allow small groups now        
                allowSmallGroups = true;
                LOG.info("Allowing small groups after iteration: " + iterations + " splitsProcessed: "
                        + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                        + groupedSplitsList.size());
            }

            if (LOG.isDebugEnabled()) {
                LOG.debug("Iteration: " + iterations + " splitsProcessed: " + splitsProcessed
                        + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                        + groupedSplitsList.size());
            }
        }
        InputSplit[] groupedSplits = new InputSplit[groupedSplitsList.size()];
        groupedSplitsList.toArray(groupedSplits);
        LOG.info("Number of splits desired: " + desiredNumSplits + " created: " + groupedSplitsList.size()
                + " splitsProcessed: " + splitsProcessed);
        return groupedSplits;
    }

}