Example usage for org.apache.hadoop.conf Configuration getFloat

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getFloat.

Prototype

public float getFloat(String name, float defaultValue)

Source Link

Document

Get the value of the name property as a float.

Usage

From source file:org.apache.tez.engine.common.shuffle.impl.MergeManager.java

License:Apache License

private TezRawKeyValueIterator finalMerge(Configuration job, FileSystem fs, List<MapOutput> inMemoryMapOutputs,
        List<Path> onDiskMapOutputs) throws IOException {
    LOG.info("finalMerge called with " + inMemoryMapOutputs.size() + " in-memory map-outputs and "
            + onDiskMapOutputs.size() + " on-disk map-outputs");

    final float maxRedPer = job.getFloat(TezJobConfig.TEZ_ENGINE_INPUT_BUFFER_PERCENT,
            TezJobConfig.DEFAULT_TEZ_ENGINE_INPUT_BUFFER_PERCENT);
    if (maxRedPer > 1.0 || maxRedPer < 0.0) {
        throw new IOException(TezJobConfig.TEZ_ENGINE_INPUT_BUFFER_PERCENT + maxRedPer);
    }//w  w w  . j  a  va  2  s . c o  m
    int maxInMemReduce = (int) Math.min(Runtime.getRuntime().maxMemory() * maxRedPer, Integer.MAX_VALUE);

    // merge config params
    Class keyClass = (Class) ConfigUtils.getIntermediateInputKeyClass(job);
    Class valueClass = (Class) ConfigUtils.getIntermediateInputValueClass(job);
    final Path tmpDir = new Path(taskAttemptId.toString());
    final RawComparator comparator = (RawComparator) ConfigUtils.getIntermediateInputKeyComparator(job);

    // segments required to vacate memory
    List<Segment> memDiskSegments = new ArrayList<Segment>();
    long inMemToDiskBytes = 0;
    boolean mergePhaseFinished = false;
    if (inMemoryMapOutputs.size() > 0) {
        TezTaskID mapId = inMemoryMapOutputs.get(0).getMapId().getTaskID();
        inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, memDiskSegments, maxInMemReduce);
        final int numMemDiskSegments = memDiskSegments.size();
        if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.size()) {

            // If we reach here, it implies that we have less than io.sort.factor
            // disk segments and this will be incremented by 1 (result of the 
            // memory segments merge). Since this total would still be 
            // <= io.sort.factor, we will not do any more intermediate merges,
            // the merge of all these disk segments would be directly fed to the
            // reduce method

            mergePhaseFinished = true;
            // must spill to disk, but can't retain in-mem for intermediate merge
            final Path outputPath = mapOutputFile.getInputFileForWrite(mapId, inMemToDiskBytes)
                    .suffix(Constants.MERGED_OUTPUT_PREFIX);
            final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs, keyClass, valueClass, memDiskSegments,
                    numMemDiskSegments, tmpDir, comparator, reporter, spilledRecordsCounter, null, mergePhase);
            final Writer writer = new Writer(job, fs, outputPath, keyClass, valueClass, codec, null);
            try {
                TezMerger.writeFile(rIter, writer, reporter, job);
                // add to list of final disk outputs.
                onDiskMapOutputs.add(outputPath);
            } catch (IOException e) {
                if (null != outputPath) {
                    try {
                        fs.delete(outputPath, true);
                    } catch (IOException ie) {
                        // NOTHING
                    }
                }
                throw e;
            } finally {
                if (null != writer) {
                    writer.close();
                }
            }
            LOG.info("Merged " + numMemDiskSegments + " segments, " + inMemToDiskBytes
                    + " bytes to disk to satisfy " + "reduce memory limit");
            inMemToDiskBytes = 0;
            memDiskSegments.clear();
        } else if (inMemToDiskBytes != 0) {
            LOG.info("Keeping " + numMemDiskSegments + " segments, " + inMemToDiskBytes
                    + " bytes in memory for " + "intermediate, on-disk merge");
        }
    }

    // segments on disk
    List<Segment> diskSegments = new ArrayList<Segment>();
    long onDiskBytes = inMemToDiskBytes;
    Path[] onDisk = onDiskMapOutputs.toArray(new Path[onDiskMapOutputs.size()]);
    for (Path file : onDisk) {
        onDiskBytes += fs.getFileStatus(file).getLen();
        LOG.debug("Disk file: " + file + " Length is " + fs.getFileStatus(file).getLen());
        diskSegments.add(new Segment(job, fs, file, codec, false,
                (file.toString().endsWith(Constants.MERGED_OUTPUT_PREFIX) ? null : mergedMapOutputsCounter)));
    }
    LOG.info("Merging " + onDisk.length + " files, " + onDiskBytes + " bytes from disk");
    Collections.sort(diskSegments, new Comparator<Segment>() {
        public int compare(Segment o1, Segment o2) {
            if (o1.getLength() == o2.getLength()) {
                return 0;
            }
            return o1.getLength() < o2.getLength() ? -1 : 1;
        }
    });

    // build final list of segments from merged backed by disk + in-mem
    List<Segment> finalSegments = new ArrayList<Segment>();
    long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0);
    LOG.info("Merging " + finalSegments.size() + " segments, " + inMemBytes + " bytes from memory into reduce");
    if (0 != onDiskBytes) {
        final int numInMemSegments = memDiskSegments.size();
        diskSegments.addAll(0, memDiskSegments);
        memDiskSegments.clear();
        // Pass mergePhase only if there is a going to be intermediate
        // merges. See comment where mergePhaseFinished is being set
        Progress thisPhase = (mergePhaseFinished) ? null : mergePhase;
        TezRawKeyValueIterator diskMerge = TezMerger.merge(job, fs, keyClass, valueClass, diskSegments,
                ioSortFactor, numInMemSegments, tmpDir, comparator, reporter, false, spilledRecordsCounter,
                null, thisPhase);
        diskSegments.clear();
        if (0 == finalSegments.size()) {
            return diskMerge;
        }
        finalSegments.add(new Segment(new RawKVIteratorReader(diskMerge, onDiskBytes), true));
    }
    return TezMerger.merge(job, fs, keyClass, valueClass, finalSegments, finalSegments.size(), tmpDir,
            comparator, reporter, spilledRecordsCounter, null, null);

}

From source file:org.apache.tez.mapreduce.common.MRInputAMSplitGenerator.java

License:Apache License

@Override
public List<Event> initialize() throws Exception {
    Stopwatch sw = null;//from   w w  w  . j  av  a2s . co m
    if (LOG.isDebugEnabled()) {
        sw = new Stopwatch().start();
    }
    MRInputUserPayloadProto userPayloadProto = MRInputHelpers
            .parseMRInputPayload(getContext().getInputUserPayload());
    if (LOG.isDebugEnabled()) {
        sw.stop();
        LOG.debug("Time to parse MRInput payload into prot: " + sw.elapsedMillis());
    }
    if (LOG.isDebugEnabled()) {
        sw.reset().start();
    }
    Configuration conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes());

    sendSerializedEvents = conf.getBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD,
            MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
    LOG.info("Emitting serialized splits: " + sendSerializedEvents);
    if (LOG.isDebugEnabled()) {
        sw.stop();
        LOG.debug("Time converting ByteString to configuration: " + sw.elapsedMillis());
    }

    if (LOG.isDebugEnabled()) {
        sw.reset().start();
    }

    int totalResource = getContext().getTotalAvailableResource().getMemory();
    int taskResource = getContext().getVertexTaskResource().getMemory();
    float waves = conf.getFloat(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES,
            TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT);

    int numTasks = (int) ((totalResource * waves) / taskResource);

    LOG.info("Input " + getContext().getInputName() + " asking for " + numTasks + " tasks. Headroom: "
            + totalResource + " Task Resource: " + taskResource + " waves: " + waves);

    // Read all credentials into the credentials instance stored in JobConf.
    JobConf jobConf = new JobConf(conf);
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());

    InputSplitInfoMem inputSplitInfo = null;
    boolean groupSplits = userPayloadProto.getGroupingEnabled();
    if (groupSplits) {
        LOG.info("Grouping input splits");
        inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, true, numTasks);
    } else {
        inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, false, 0);
    }
    if (LOG.isDebugEnabled()) {
        sw.stop();
        LOG.debug("Time to create splits to mem: " + sw.elapsedMillis());
    }

    List<Event> events = Lists.newArrayListWithCapacity(inputSplitInfo.getNumTasks() + 1);

    InputConfigureVertexTasksEvent configureVertexEvent = InputConfigureVertexTasksEvent.create(
            inputSplitInfo.getNumTasks(), VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()),
            InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate());
    events.add(configureVertexEvent);

    if (sendSerializedEvents) {
        MRSplitsProto splitsProto = inputSplitInfo.getSplitsProto();
        int count = 0;
        for (MRSplitProto mrSplit : splitsProto.getSplitsList()) {
            // Unnecessary array copy, can be avoided by using ByteBuffer instead of a raw array.
            InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count++,
                    mrSplit.toByteString().asReadOnlyByteBuffer());
            events.add(diEvent);
        }
    } else {
        int count = 0;
        if (inputSplitInfo.holdsNewFormatSplits()) {
            for (org.apache.hadoop.mapreduce.InputSplit split : inputSplitInfo.getNewFormatSplits()) {
                InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++,
                        split);
                events.add(diEvent);
            }
        } else {
            for (org.apache.hadoop.mapred.InputSplit split : inputSplitInfo.getOldFormatSplits()) {
                InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++,
                        split);
                events.add(diEvent);
            }
        }
    }

    return events;
}

From source file:org.apache.tez.mapreduce.grouper.TezSplitGrouper.java

License:Apache License

public List<GroupedSplitContainer> getGroupedSplits(Configuration conf, List<SplitContainer> originalSplits,
        int desiredNumSplits, String wrappedInputFormatName, SplitSizeEstimatorWrapper estimator,
        SplitLocationProviderWrapper locationProvider) throws IOException, InterruptedException {
    LOG.info("Grouping splits in Tez");
    Preconditions.checkArgument(originalSplits != null, "Splits must be specified");

    int configNumSplits = conf.getInt(TEZ_GROUPING_SPLIT_COUNT, 0);
    if (configNumSplits > 0) {
        // always use config override if specified
        desiredNumSplits = configNumSplits;
        LOG.info("Desired numSplits overridden by config to: " + desiredNumSplits);
    }/*from w  ww  .  jav a2 s  .c om*/

    if (estimator == null) {
        estimator = DEFAULT_SPLIT_ESTIMATOR;
    }
    if (locationProvider == null) {
        locationProvider = DEFAULT_SPLIT_LOCATION_PROVIDER;
    }

    List<GroupedSplitContainer> groupedSplits = null;
    String emptyLocation = "EmptyLocation";
    String localhost = "localhost";
    String[] emptyLocations = { emptyLocation };
    groupedSplits = new ArrayList<GroupedSplitContainer>(desiredNumSplits);

    boolean allSplitsHaveLocalhost = true;

    long totalLength = 0;
    Map<String, LocationHolder> distinctLocations = createLocationsMap(conf);
    // go through splits and add them to locations
    for (SplitContainer split : originalSplits) {
        totalLength += estimator.getEstimatedSize(split);
        String[] locations = locationProvider.getPreferredLocations(split);
        if (locations == null || locations.length == 0) {
            locations = emptyLocations;
            allSplitsHaveLocalhost = false;
        }
        for (String location : locations) {
            if (location == null) {
                location = emptyLocation;
                allSplitsHaveLocalhost = false;
            }
            if (!location.equalsIgnoreCase(localhost)) {
                allSplitsHaveLocalhost = false;
            }
            distinctLocations.put(location, null);
        }
    }

    if (!(configNumSplits > 0 || originalSplits.size() == 0)) {
        // numSplits has not been overridden by config
        // numSplits has been set at runtime
        // there are splits generated
        // desired splits is less than number of splits generated
        // Do sanity checks

        int splitCount = desiredNumSplits > 0 ? desiredNumSplits : originalSplits.size();
        long lengthPerGroup = totalLength / splitCount;

        long maxLengthPerGroup = conf.getLong(TEZ_GROUPING_SPLIT_MAX_SIZE, TEZ_GROUPING_SPLIT_MAX_SIZE_DEFAULT);
        long minLengthPerGroup = conf.getLong(TEZ_GROUPING_SPLIT_MIN_SIZE, TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT);
        if (maxLengthPerGroup < minLengthPerGroup || minLengthPerGroup <= 0) {
            throw new TezUncheckedException("Invalid max/min group lengths. Required min>0, max>=min. "
                    + " max: " + maxLengthPerGroup + " min: " + minLengthPerGroup);
        }
        if (lengthPerGroup > maxLengthPerGroup) {
            // splits too big to work. Need to override with max size.
            int newDesiredNumSplits = (int) (totalLength / maxLengthPerGroup) + 1;
            LOG.info("Desired splits: " + desiredNumSplits + " too small. " + " Desired splitLength: "
                    + lengthPerGroup + " Max splitLength: " + maxLengthPerGroup + " New desired splits: "
                    + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: "
                    + originalSplits.size());

            desiredNumSplits = newDesiredNumSplits;
        } else if (lengthPerGroup < minLengthPerGroup) {
            // splits too small to work. Need to override with size.
            int newDesiredNumSplits = (int) (totalLength / minLengthPerGroup) + 1;
            /**
             * This is a workaround for systems like S3 that pass the same
             * fake hostname for all splits.
             */
            if (!allSplitsHaveLocalhost) {
                desiredNumSplits = newDesiredNumSplits;
            }

            LOG.info("Desired splits: " + desiredNumSplits + " too large. " + " Desired splitLength: "
                    + lengthPerGroup + " Min splitLength: " + minLengthPerGroup + " New desired splits: "
                    + newDesiredNumSplits + " Final desired splits: " + desiredNumSplits
                    + " All splits have localhost: " + allSplitsHaveLocalhost + " Total length: " + totalLength
                    + " Original splits: " + originalSplits.size());
        }
    }

    if (desiredNumSplits == 0 || originalSplits.size() == 0 || desiredNumSplits >= originalSplits.size()) {
        // nothing set. so return all the splits as is
        LOG.info("Using original number of splits: " + originalSplits.size() + " desired splits: "
                + desiredNumSplits);
        groupedSplits = new ArrayList<GroupedSplitContainer>(originalSplits.size());
        for (SplitContainer split : originalSplits) {
            GroupedSplitContainer newSplit = new GroupedSplitContainer(1, wrappedInputFormatName,
                    cleanupLocations(locationProvider.getPreferredLocations(split)), null);
            newSplit.addSplit(split);
            groupedSplits.add(newSplit);
        }
        return groupedSplits;
    }

    long lengthPerGroup = totalLength / desiredNumSplits;
    int numNodeLocations = distinctLocations.size();
    int numSplitsPerLocation = originalSplits.size() / numNodeLocations;
    int numSplitsInGroup = originalSplits.size() / desiredNumSplits;

    // allocation loop here so that we have a good initial size for the lists
    for (String location : distinctLocations.keySet()) {
        distinctLocations.put(location, new LocationHolder(numSplitsPerLocation + 1));
    }

    Set<String> locSet = new HashSet<String>();
    for (SplitContainer split : originalSplits) {
        locSet.clear();
        String[] locations = locationProvider.getPreferredLocations(split);
        if (locations == null || locations.length == 0) {
            locations = emptyLocations;
        }
        for (String location : locations) {
            if (location == null) {
                location = emptyLocation;
            }
            locSet.add(location);
        }
        for (String location : locSet) {
            LocationHolder holder = distinctLocations.get(location);
            holder.splits.add(split);
        }
    }

    boolean groupByLength = conf.getBoolean(TEZ_GROUPING_SPLIT_BY_LENGTH, TEZ_GROUPING_SPLIT_BY_LENGTH_DEFAULT);
    boolean groupByCount = conf.getBoolean(TEZ_GROUPING_SPLIT_BY_COUNT, TEZ_GROUPING_SPLIT_BY_COUNT_DEFAULT);
    boolean nodeLocalOnly = conf.getBoolean(TEZ_GROUPING_NODE_LOCAL_ONLY, TEZ_GROUPING_NODE_LOCAL_ONLY_DEFAULT);
    if (!(groupByLength || groupByCount)) {
        throw new TezUncheckedException("None of the grouping parameters are true: "
                + TEZ_GROUPING_SPLIT_BY_LENGTH + ", " + TEZ_GROUPING_SPLIT_BY_COUNT);
    }
    LOG.info("Desired numSplits: " + desiredNumSplits + " lengthPerGroup: " + lengthPerGroup + " numLocations: "
            + numNodeLocations + " numSplitsPerLocation: " + numSplitsPerLocation + " numSplitsInGroup: "
            + numSplitsInGroup + " totalLength: " + totalLength + " numOriginalSplits: " + originalSplits.size()
            + " . Grouping by length: " + groupByLength + " count: " + groupByCount + " nodeLocalOnly: "
            + nodeLocalOnly);

    // go through locations and group splits
    int splitsProcessed = 0;
    List<SplitContainer> group = new ArrayList<SplitContainer>(numSplitsInGroup);
    Set<String> groupLocationSet = new HashSet<String>(10);
    boolean allowSmallGroups = false;
    boolean doingRackLocal = false;
    int iterations = 0;
    while (splitsProcessed < originalSplits.size()) {
        iterations++;
        int numFullGroupsCreated = 0;
        for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
            group.clear();
            groupLocationSet.clear();
            String location = entry.getKey();
            LocationHolder holder = entry.getValue();
            SplitContainer splitContainer = holder.getUnprocessedHeadSplit();
            if (splitContainer == null) {
                // all splits on node processed
                continue;
            }
            int oldHeadIndex = holder.headIndex;
            long groupLength = 0;
            int groupNumSplits = 0;
            do {
                group.add(splitContainer);
                groupLength += estimator.getEstimatedSize(splitContainer);
                groupNumSplits++;
                holder.incrementHeadIndex();
                splitContainer = holder.getUnprocessedHeadSplit();
            } while (splitContainer != null
                    && (!groupByLength
                            || (groupLength + estimator.getEstimatedSize(splitContainer) <= lengthPerGroup))
                    && (!groupByCount || (groupNumSplits + 1 <= numSplitsInGroup)));

            if (holder.isEmpty() && !allowSmallGroups && (!groupByLength || groupLength < lengthPerGroup / 2)
                    && (!groupByCount || groupNumSplits < numSplitsInGroup / 2)) {
                // group too small, reset it
                holder.headIndex = oldHeadIndex;
                continue;
            }

            numFullGroupsCreated++;

            // One split group created
            String[] groupLocation = { location };
            if (location == emptyLocation) {
                groupLocation = null;
            } else if (doingRackLocal) {
                for (SplitContainer splitH : group) {
                    String[] locations = locationProvider.getPreferredLocations(splitH);
                    if (locations != null) {
                        for (String loc : locations) {
                            if (loc != null) {
                                groupLocationSet.add(loc);
                            }
                        }
                    }
                }
                groupLocation = groupLocationSet.toArray(groupLocation);
            }
            GroupedSplitContainer groupedSplit = new GroupedSplitContainer(group.size(), wrappedInputFormatName,
                    groupLocation,
                    // pass rack local hint directly to AM
                    ((doingRackLocal && location != emptyLocation) ? location : null));
            for (SplitContainer groupedSplitContainer : group) {
                groupedSplit.addSplit(groupedSplitContainer);
                Preconditions.checkState(groupedSplitContainer.isProcessed() == false,
                        "Duplicates in grouping at location: " + location);
                groupedSplitContainer.setIsProcessed(true);
                splitsProcessed++;
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Grouped " + group.size() + " length: " + groupedSplit.getLength() + " split at: "
                        + location);
            }
            groupedSplits.add(groupedSplit);
        }

        if (!doingRackLocal && numFullGroupsCreated < 1) {
            // no node could create a regular node-local group.

            // Allow small groups if that is configured.
            if (nodeLocalOnly && !allowSmallGroups) {
                LOG.info(
                        "Allowing small groups early after attempting to create full groups at iteration: {}, groupsCreatedSoFar={}",
                        iterations, groupedSplits.size());
                allowSmallGroups = true;
                continue;
            }

            // else go rack-local
            doingRackLocal = true;
            // re-create locations
            int numRemainingSplits = originalSplits.size() - splitsProcessed;
            Set<SplitContainer> remainingSplits = new HashSet<SplitContainer>(numRemainingSplits);
            // gather remaining splits.
            for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
                LocationHolder locHolder = entry.getValue();
                while (!locHolder.isEmpty()) {
                    SplitContainer splitHolder = locHolder.getUnprocessedHeadSplit();
                    if (splitHolder != null) {
                        remainingSplits.add(splitHolder);
                        locHolder.incrementHeadIndex();
                    }
                }
            }
            if (remainingSplits.size() != numRemainingSplits) {
                throw new TezUncheckedException(
                        "Expected: " + numRemainingSplits + " got: " + remainingSplits.size());
            }

            // doing all this now instead of up front because the number of remaining
            // splits is expected to be much smaller
            RackResolver.init(conf);
            Map<String, String> locToRackMap = new HashMap<String, String>(distinctLocations.size());
            Map<String, LocationHolder> rackLocations = createLocationsMap(conf);
            for (String location : distinctLocations.keySet()) {
                String rack = emptyLocation;
                if (location != emptyLocation) {
                    rack = RackResolver.resolve(location).getNetworkLocation();
                }
                locToRackMap.put(location, rack);
                if (rackLocations.get(rack) == null) {
                    // splits will probably be located in all racks
                    rackLocations.put(rack, new LocationHolder(numRemainingSplits));
                }
            }
            distinctLocations.clear();
            HashSet<String> rackSet = new HashSet<String>(rackLocations.size());
            int numRackSplitsToGroup = remainingSplits.size();
            for (SplitContainer split : originalSplits) {
                if (numRackSplitsToGroup == 0) {
                    break;
                }
                // Iterate through the original splits in their order and consider them for grouping.
                // This maintains the original ordering in the list and thus subsequent grouping will
                // maintain that order
                if (!remainingSplits.contains(split)) {
                    continue;
                }
                numRackSplitsToGroup--;
                rackSet.clear();
                String[] locations = locationProvider.getPreferredLocations(split);
                if (locations == null || locations.length == 0) {
                    locations = emptyLocations;
                }
                for (String location : locations) {
                    if (location == null) {
                        location = emptyLocation;
                    }
                    rackSet.add(locToRackMap.get(location));
                }
                for (String rack : rackSet) {
                    rackLocations.get(rack).splits.add(split);
                }
            }

            remainingSplits.clear();
            distinctLocations = rackLocations;
            // adjust split length to be smaller because the data is non local
            float rackSplitReduction = conf.getFloat(TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION,
                    TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION_DEFAULT);
            if (rackSplitReduction > 0) {
                long newLengthPerGroup = (long) (lengthPerGroup * rackSplitReduction);
                int newNumSplitsInGroup = (int) (numSplitsInGroup * rackSplitReduction);
                if (newLengthPerGroup > 0) {
                    lengthPerGroup = newLengthPerGroup;
                }
                if (newNumSplitsInGroup > 0) {
                    numSplitsInGroup = newNumSplitsInGroup;
                }
            }

            LOG.info("Doing rack local after iteration: " + iterations + " splitsProcessed: " + splitsProcessed
                    + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplits.size()
                    + " lengthPerGroup: " + lengthPerGroup + " numSplitsInGroup: " + numSplitsInGroup);

            // dont do smallGroups for the first pass
            continue;
        }

        if (!allowSmallGroups && numFullGroupsCreated <= numNodeLocations / 10) {
            // a few nodes have a lot of data or data is thinly spread across nodes
            // so allow small groups now
            allowSmallGroups = true;
            LOG.info("Allowing small groups after iteration: " + iterations + " splitsProcessed: "
                    + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                    + groupedSplits.size());
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("Iteration: " + iterations + " splitsProcessed: " + splitsProcessed
                    + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                    + groupedSplits.size());
        }
    }
    LOG.info("Number of splits desired: " + desiredNumSplits + " created: " + groupedSplits.size()
            + " splitsProcessed: " + splitsProcessed);
    return groupedSplits;
}

From source file:org.apache.tez.runtime.library.broadcast.input.BroadcastInputManager.java

License:Apache License

public BroadcastInputManager(String uniqueIdentifier, Configuration conf) {
    this.conf = conf;

    this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier);
    this.localDirAllocator = new LocalDirAllocator(TezJobConfig.LOCAL_DIRS);

    // Setup configuration
    final float maxInMemCopyUse = conf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }//from www .  jav  a 2s .c om

    // Allow unit tests to fix Runtime memory
    this.memoryLimit = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE)) * maxInMemCopyUse);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent);

    LOG.info("BroadcastInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: "
            + this.maxSingleShuffleLimit);
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.MergeManager.java

License:Apache License

public MergeManager(Configuration conf, FileSystem localFS, LocalDirAllocator localDirAllocator,
        TezInputContext inputContext, Combiner combiner, TezCounter spilledRecordsCounter,
        TezCounter reduceCombineInputCounter, TezCounter mergedMapOutputsCounter,
        ExceptionReporter exceptionReporter) {
    this.inputContext = inputContext;
    this.conf = conf;
    this.localDirAllocator = localDirAllocator;
    this.exceptionReporter = exceptionReporter;

    this.combiner = combiner;

    this.reduceCombineInputCounter = reduceCombineInputCounter;
    this.spilledRecordsCounter = spilledRecordsCounter;
    this.mergedMapOutputsCounter = mergedMapOutputsCounter;
    this.mapOutputFile = new TezTaskOutputFiles(conf, inputContext.getUniqueIdentifier());

    this.localFS = localFS;
    this.rfs = ((LocalFileSystem) localFS).getRaw();

    if (ConfigUtils.isIntermediateInputCompressed(conf)) {
        Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(conf,
                DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
    } else {//from ww w .j  a  v  a 2 s . c  o m
        codec = null;
    }
    this.ifileReadAhead = conf.getBoolean(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
            TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
    if (this.ifileReadAhead) {
        this.ifileReadAheadLength = conf.getInt(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
                TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
    } else {
        this.ifileReadAheadLength = 0;
    }
    this.ifileBufferSize = conf.getInt("io.file.buffer.size",
            TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);

    final float maxInMemCopyUse = conf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }

    // Allow unit tests to fix Runtime memory
    this.memoryLimit = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE)) * maxInMemCopyUse);

    this.ioSortFactor = conf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_IO_SORT_FACTOR);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent);
    this.memToMemMergeOutputsThreshold = conf.getInt(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS,
            ioSortFactor);
    this.mergeThreshold = (long) (this.memoryLimit
            * conf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT,
                    TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT));
    LOG.info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit="
            + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor="
            + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold);

    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
        throw new RuntimeException("Invlaid configuration: "
                + "maxSingleShuffleLimit should be less than mergeThreshold" + "maxSingleShuffleLimit: "
                + this.maxSingleShuffleLimit + "mergeThreshold: " + this.mergeThreshold);
    }

    boolean allowMemToMemMerge = conf.getBoolean(TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM);
    if (allowMemToMemMerge) {
        this.memToMemMerger = new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold);
        this.memToMemMerger.start();
    } else {
        this.memToMemMerger = null;
    }

    this.inMemoryMerger = new InMemoryMerger(this);
    this.inMemoryMerger.start();

    this.onDiskMerger = new OnDiskMerger(this);
    this.onDiskMerger.start();
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.MergeManager.java

License:Apache License

private TezRawKeyValueIterator finalMerge(Configuration job, FileSystem fs, List<MapOutput> inMemoryMapOutputs,
        List<Path> onDiskMapOutputs) throws IOException {
    LOG.info("finalMerge called with " + inMemoryMapOutputs.size() + " in-memory map-outputs and "
            + onDiskMapOutputs.size() + " on-disk map-outputs");

    final float maxRedPer = job.getFloat(TezJobConfig.TEZ_RUNTIME_INPUT_BUFFER_PERCENT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_INPUT_BUFFER_PERCENT);
    if (maxRedPer > 1.0 || maxRedPer < 0.0) {
        throw new IOException(TezJobConfig.TEZ_RUNTIME_INPUT_BUFFER_PERCENT + maxRedPer);
    }//  www  . j a v a2  s. c  o m
    int maxInMemReduce = (int) Math.min(Runtime.getRuntime().maxMemory() * maxRedPer, Integer.MAX_VALUE);
    LOG.info("Memory allocated for final merge output: " + maxInMemReduce + ", using factor: " + maxRedPer);

    // merge config params
    Class keyClass = (Class) ConfigUtils.getIntermediateInputKeyClass(job);
    Class valueClass = (Class) ConfigUtils.getIntermediateInputValueClass(job);
    final Path tmpDir = new Path(inputContext.getUniqueIdentifier());
    final RawComparator comparator = (RawComparator) ConfigUtils.getIntermediateInputKeyComparator(job);

    // segments required to vacate memory
    List<Segment> memDiskSegments = new ArrayList<Segment>();
    long inMemToDiskBytes = 0;
    boolean mergePhaseFinished = false;
    if (inMemoryMapOutputs.size() > 0) {
        int srcTaskId = inMemoryMapOutputs.get(0).getAttemptIdentifier().getInputIdentifier().getSrcTaskIndex();
        inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, memDiskSegments, maxInMemReduce);
        final int numMemDiskSegments = memDiskSegments.size();
        if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.size()) {

            // If we reach here, it implies that we have less than io.sort.factor
            // disk segments and this will be incremented by 1 (result of the 
            // memory segments merge). Since this total would still be 
            // <= io.sort.factor, we will not do any more intermediate merges,
            // the merge of all these disk segments would be directly fed to the
            // reduce method

            mergePhaseFinished = true;
            // must spill to disk, but can't retain in-mem for intermediate merge
            final Path outputPath = mapOutputFile.getInputFileForWrite(srcTaskId, inMemToDiskBytes)
                    .suffix(Constants.MERGED_OUTPUT_PREFIX);
            final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs, keyClass, valueClass, memDiskSegments,
                    numMemDiskSegments, tmpDir, comparator, nullProgressable, spilledRecordsCounter, null,
                    null);
            final Writer writer = new Writer(job, fs, outputPath, keyClass, valueClass, codec, null);
            try {
                TezMerger.writeFile(rIter, writer, nullProgressable,
                        TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
                // add to list of final disk outputs.
                onDiskMapOutputs.add(outputPath);
            } catch (IOException e) {
                if (null != outputPath) {
                    try {
                        fs.delete(outputPath, true);
                    } catch (IOException ie) {
                        // NOTHING
                    }
                }
                throw e;
            } finally {
                if (null != writer) {
                    writer.close();
                }
            }
            LOG.info("Merged " + numMemDiskSegments + " segments, " + inMemToDiskBytes
                    + " bytes to disk to satisfy " + "reduce memory limit");
            inMemToDiskBytes = 0;
            memDiskSegments.clear();
        } else if (inMemToDiskBytes != 0) {
            LOG.info("Keeping " + numMemDiskSegments + " segments, " + inMemToDiskBytes
                    + " bytes in memory for " + "intermediate, on-disk merge");
        }
    }

    // segments on disk
    List<Segment> diskSegments = new ArrayList<Segment>();
    long onDiskBytes = inMemToDiskBytes;
    Path[] onDisk = onDiskMapOutputs.toArray(new Path[onDiskMapOutputs.size()]);
    for (Path file : onDisk) {
        onDiskBytes += fs.getFileStatus(file).getLen();
        LOG.debug("Disk file: " + file + " Length is " + fs.getFileStatus(file).getLen());
        diskSegments.add(new Segment(job, fs, file, codec, ifileReadAhead, ifileReadAheadLength,
                ifileBufferSize, false,
                (file.toString().endsWith(Constants.MERGED_OUTPUT_PREFIX) ? null : mergedMapOutputsCounter)));
    }
    LOG.info("Merging " + onDisk.length + " files, " + onDiskBytes + " bytes from disk");
    Collections.sort(diskSegments, new Comparator<Segment>() {
        public int compare(Segment o1, Segment o2) {
            if (o1.getLength() == o2.getLength()) {
                return 0;
            }
            return o1.getLength() < o2.getLength() ? -1 : 1;
        }
    });

    // build final list of segments from merged backed by disk + in-mem
    List<Segment> finalSegments = new ArrayList<Segment>();
    long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0);
    LOG.info("Merging " + finalSegments.size() + " segments, " + inMemBytes + " bytes from memory into reduce");
    if (0 != onDiskBytes) {
        final int numInMemSegments = memDiskSegments.size();
        diskSegments.addAll(0, memDiskSegments);
        memDiskSegments.clear();
        TezRawKeyValueIterator diskMerge = TezMerger.merge(job, fs, keyClass, valueClass, diskSegments,
                ioSortFactor, numInMemSegments, tmpDir, comparator, nullProgressable, false,
                spilledRecordsCounter, null, null);
        diskSegments.clear();
        if (0 == finalSegments.size()) {
            return diskMerge;
        }
        finalSegments.add(new Segment(new RawKVIteratorReader(diskMerge, onDiskBytes), true));
    }
    return TezMerger.merge(job, fs, keyClass, valueClass, finalSegments, finalSegments.size(), tmpDir,
            comparator, nullProgressable, spilledRecordsCounter, null, null);

}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator.java

License:Apache License

public SimpleFetchedInputAllocator(String uniqueIdentifier, Configuration conf, long maxTaskAvailableMemory,
        long memoryAvailable) {
    this.conf = conf;
    this.maxAvailableTaskMemory = maxTaskAvailableMemory;
    this.initialMemoryAvailable = memoryAvailable;

    this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier);
    this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);

    // Setup configuration
    final float maxInMemCopyUse = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }//from w  w w .ja v  a 2s.  com

    long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse);

    if (memReq <= this.initialMemoryAvailable) {
        this.memoryLimit = memReq;
    } else {
        this.memoryLimit = initialMemoryAvailable;
    }

    LOG.info("RequestedMem=" + memReq + ", Allocated: " + this.memoryLimit);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    //TODO: cap it to MAX_VALUE until MemoryFetchedInput can handle > 2 GB
    this.maxSingleShuffleLimit = (long) Math.min((memoryLimit * singleShuffleMemoryLimitPercent),
            Integer.MAX_VALUE);

    LOG.info("SimpleInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: "
            + this.maxSingleShuffleLimit);
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator.java

License:Apache License

@Private
public static long getInitialMemoryReq(Configuration conf, long maxAvailableTaskMemory) {
    final float maxInMemCopyUse = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }/*from   w  w  w .j  a v a 2 s.com*/
    long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse);
    return memReq;
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager.java

License:Apache License

/**
 * Construct the MergeManager. Must call start before it becomes usable.
 *//*  w w w  .  j a v a  2  s  .c om*/
public MergeManager(Configuration conf, FileSystem localFS, LocalDirAllocator localDirAllocator,
        InputContext inputContext, Combiner combiner, TezCounter spilledRecordsCounter,
        TezCounter reduceCombineInputCounter, TezCounter mergedMapOutputsCounter,
        ExceptionReporter exceptionReporter, long initialMemoryAvailable, CompressionCodec codec,
        boolean ifileReadAheadEnabled, int ifileReadAheadLength) {
    this.inputContext = inputContext;
    this.conf = conf;
    this.localDirAllocator = localDirAllocator;
    this.exceptionReporter = exceptionReporter;
    this.initialMemoryAvailable = initialMemoryAvailable;

    this.combiner = combiner;

    this.reduceCombineInputCounter = reduceCombineInputCounter;
    this.spilledRecordsCounter = spilledRecordsCounter;
    this.mergedMapOutputsCounter = mergedMapOutputsCounter;
    this.mapOutputFile = new TezTaskOutputFiles(conf, inputContext.getUniqueIdentifier());

    this.localFS = localFS;
    this.rfs = ((LocalFileSystem) localFS).getRaw();

    this.numDiskToDiskMerges = inputContext.getCounters().findCounter(TaskCounter.NUM_DISK_TO_DISK_MERGES);
    this.numMemToDiskMerges = inputContext.getCounters().findCounter(TaskCounter.NUM_MEM_TO_DISK_MERGES);
    this.additionalBytesWritten = inputContext.getCounters()
            .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    this.additionalBytesRead = inputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);

    this.codec = codec;
    this.ifileReadAhead = ifileReadAheadEnabled;
    if (this.ifileReadAhead) {
        this.ifileReadAheadLength = ifileReadAheadLength;
    } else {
        this.ifileReadAheadLength = 0;
    }
    this.ifileBufferSize = conf.getInt("io.file.buffer.size",
            TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);

    // Figure out initial memory req start
    final float maxInMemCopyUse = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }

    // Allow unit tests to fix Runtime memory
    long memLimit = conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            (long) (inputContext.getTotalMemoryAvailableToTask() * maxInMemCopyUse));

    float maxRedPer = conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_BUFFER_PERCENT_DEFAULT);
    if (maxRedPer > 1.0 || maxRedPer < 0.0) {
        throw new TezUncheckedException(
                TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT + maxRedPer);
    }

    long maxRedBuffer = (long) (inputContext.getTotalMemoryAvailableToTask() * maxRedPer);
    // Figure out initial memory req end

    if (this.initialMemoryAvailable < memLimit) {
        this.memoryLimit = this.initialMemoryAvailable;
    } else {
        this.memoryLimit = memLimit;
    }

    if (this.initialMemoryAvailable < maxRedBuffer) {
        this.postMergeMemLimit = this.initialMemoryAvailable;
    } else {
        this.postMergeMemLimit = maxRedBuffer;
    }

    LOG.info("InitialRequest: ShuffleMem=" + memLimit + ", postMergeMem=" + maxRedBuffer
            + ", RuntimeTotalAvailable=" + this.initialMemoryAvailable + ". Updated to: ShuffleMem="
            + this.memoryLimit + ", postMergeMem: " + this.postMergeMemLimit);

    this.ioSortFactor = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR,
            TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    //TODO: Cap it to MAX_VALUE until MapOutput starts supporting > 2 GB
    this.maxSingleShuffleLimit = (long) Math.min((memoryLimit * singleShuffleMemoryLimitPercent),
            Integer.MAX_VALUE);
    this.memToMemMergeOutputsThreshold = conf
            .getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, ioSortFactor);
    this.mergeThreshold = (long) (this.memoryLimit
            * conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT,
                    TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT_DEFAULT));
    LOG.info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit="
            + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor="
            + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold);

    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
        throw new RuntimeException("Invlaid configuration: "
                + "maxSingleShuffleLimit should be less than mergeThreshold" + "maxSingleShuffleLimit: "
                + this.maxSingleShuffleLimit + ", mergeThreshold: " + this.mergeThreshold);
    }

    boolean allowMemToMemMerge = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM_DEFAULT);
    if (allowMemToMemMerge) {
        this.memToMemMerger = new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold);
    } else {
        this.memToMemMerger = null;
    }

    this.inMemoryMerger = new InMemoryMerger(this);

    this.onDiskMerger = new OnDiskMerger(this);
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager.java

License:Apache License

/**
 * Exposing this to get an initial memory ask without instantiating the object.
 *///  w w  w  .  ja v  a 2s.c o m
@Private
static long getInitialMemoryRequirement(Configuration conf, long maxAvailableTaskMemory) {
    final float maxInMemCopyUse = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }

    // Allow unit tests to fix Runtime memory
    long memLimit = conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            (long) (maxAvailableTaskMemory * maxInMemCopyUse));

    LOG.info("Initial Shuffle Memory Required: " + memLimit + ", based on INPUT_BUFFER_factor: "
            + maxInMemCopyUse);

    float maxRedPer = conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_BUFFER_PERCENT_DEFAULT);
    if (maxRedPer > 1.0 || maxRedPer < 0.0) {
        throw new TezUncheckedException(
                TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT + maxRedPer);
    }
    long maxRedBuffer = (long) (maxAvailableTaskMemory * maxRedPer);

    LOG.info("Initial Memory required for final merged output: " + maxRedBuffer + ", using factor: "
            + maxRedPer);

    long reqMem = Math.max(maxRedBuffer, memLimit);
    return reqMem;
}