List of usage examples for org.apache.hadoop.conf Configuration getFloat
public float getFloat(String name, float defaultValue)
name
property as a float
. From source file:org.apache.tez.engine.common.shuffle.impl.MergeManager.java
License:Apache License
private TezRawKeyValueIterator finalMerge(Configuration job, FileSystem fs, List<MapOutput> inMemoryMapOutputs, List<Path> onDiskMapOutputs) throws IOException { LOG.info("finalMerge called with " + inMemoryMapOutputs.size() + " in-memory map-outputs and " + onDiskMapOutputs.size() + " on-disk map-outputs"); final float maxRedPer = job.getFloat(TezJobConfig.TEZ_ENGINE_INPUT_BUFFER_PERCENT, TezJobConfig.DEFAULT_TEZ_ENGINE_INPUT_BUFFER_PERCENT); if (maxRedPer > 1.0 || maxRedPer < 0.0) { throw new IOException(TezJobConfig.TEZ_ENGINE_INPUT_BUFFER_PERCENT + maxRedPer); }//w w w . j a va 2 s . c o m int maxInMemReduce = (int) Math.min(Runtime.getRuntime().maxMemory() * maxRedPer, Integer.MAX_VALUE); // merge config params Class keyClass = (Class) ConfigUtils.getIntermediateInputKeyClass(job); Class valueClass = (Class) ConfigUtils.getIntermediateInputValueClass(job); final Path tmpDir = new Path(taskAttemptId.toString()); final RawComparator comparator = (RawComparator) ConfigUtils.getIntermediateInputKeyComparator(job); // segments required to vacate memory List<Segment> memDiskSegments = new ArrayList<Segment>(); long inMemToDiskBytes = 0; boolean mergePhaseFinished = false; if (inMemoryMapOutputs.size() > 0) { TezTaskID mapId = inMemoryMapOutputs.get(0).getMapId().getTaskID(); inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, memDiskSegments, maxInMemReduce); final int numMemDiskSegments = memDiskSegments.size(); if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.size()) { // If we reach here, it implies that we have less than io.sort.factor // disk segments and this will be incremented by 1 (result of the // memory segments merge). Since this total would still be // <= io.sort.factor, we will not do any more intermediate merges, // the merge of all these disk segments would be directly fed to the // reduce method mergePhaseFinished = true; // must spill to disk, but can't retain in-mem for intermediate merge final Path outputPath = mapOutputFile.getInputFileForWrite(mapId, inMemToDiskBytes) .suffix(Constants.MERGED_OUTPUT_PREFIX); final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs, keyClass, valueClass, memDiskSegments, numMemDiskSegments, tmpDir, comparator, reporter, spilledRecordsCounter, null, mergePhase); final Writer writer = new Writer(job, fs, outputPath, keyClass, valueClass, codec, null); try { TezMerger.writeFile(rIter, writer, reporter, job); // add to list of final disk outputs. onDiskMapOutputs.add(outputPath); } catch (IOException e) { if (null != outputPath) { try { fs.delete(outputPath, true); } catch (IOException ie) { // NOTHING } } throw e; } finally { if (null != writer) { writer.close(); } } LOG.info("Merged " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes to disk to satisfy " + "reduce memory limit"); inMemToDiskBytes = 0; memDiskSegments.clear(); } else if (inMemToDiskBytes != 0) { LOG.info("Keeping " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes in memory for " + "intermediate, on-disk merge"); } } // segments on disk List<Segment> diskSegments = new ArrayList<Segment>(); long onDiskBytes = inMemToDiskBytes; Path[] onDisk = onDiskMapOutputs.toArray(new Path[onDiskMapOutputs.size()]); for (Path file : onDisk) { onDiskBytes += fs.getFileStatus(file).getLen(); LOG.debug("Disk file: " + file + " Length is " + fs.getFileStatus(file).getLen()); diskSegments.add(new Segment(job, fs, file, codec, false, (file.toString().endsWith(Constants.MERGED_OUTPUT_PREFIX) ? null : mergedMapOutputsCounter))); } LOG.info("Merging " + onDisk.length + " files, " + onDiskBytes + " bytes from disk"); Collections.sort(diskSegments, new Comparator<Segment>() { public int compare(Segment o1, Segment o2) { if (o1.getLength() == o2.getLength()) { return 0; } return o1.getLength() < o2.getLength() ? -1 : 1; } }); // build final list of segments from merged backed by disk + in-mem List<Segment> finalSegments = new ArrayList<Segment>(); long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0); LOG.info("Merging " + finalSegments.size() + " segments, " + inMemBytes + " bytes from memory into reduce"); if (0 != onDiskBytes) { final int numInMemSegments = memDiskSegments.size(); diskSegments.addAll(0, memDiskSegments); memDiskSegments.clear(); // Pass mergePhase only if there is a going to be intermediate // merges. See comment where mergePhaseFinished is being set Progress thisPhase = (mergePhaseFinished) ? null : mergePhase; TezRawKeyValueIterator diskMerge = TezMerger.merge(job, fs, keyClass, valueClass, diskSegments, ioSortFactor, numInMemSegments, tmpDir, comparator, reporter, false, spilledRecordsCounter, null, thisPhase); diskSegments.clear(); if (0 == finalSegments.size()) { return diskMerge; } finalSegments.add(new Segment(new RawKVIteratorReader(diskMerge, onDiskBytes), true)); } return TezMerger.merge(job, fs, keyClass, valueClass, finalSegments, finalSegments.size(), tmpDir, comparator, reporter, spilledRecordsCounter, null, null); }
From source file:org.apache.tez.mapreduce.common.MRInputAMSplitGenerator.java
License:Apache License
@Override public List<Event> initialize() throws Exception { Stopwatch sw = null;//from w w w . j av a2s . co m if (LOG.isDebugEnabled()) { sw = new Stopwatch().start(); } MRInputUserPayloadProto userPayloadProto = MRInputHelpers .parseMRInputPayload(getContext().getInputUserPayload()); if (LOG.isDebugEnabled()) { sw.stop(); LOG.debug("Time to parse MRInput payload into prot: " + sw.elapsedMillis()); } if (LOG.isDebugEnabled()) { sw.reset().start(); } Configuration conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes()); sendSerializedEvents = conf.getBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT); LOG.info("Emitting serialized splits: " + sendSerializedEvents); if (LOG.isDebugEnabled()) { sw.stop(); LOG.debug("Time converting ByteString to configuration: " + sw.elapsedMillis()); } if (LOG.isDebugEnabled()) { sw.reset().start(); } int totalResource = getContext().getTotalAvailableResource().getMemory(); int taskResource = getContext().getVertexTaskResource().getMemory(); float waves = conf.getFloat(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT); int numTasks = (int) ((totalResource * waves) / taskResource); LOG.info("Input " + getContext().getInputName() + " asking for " + numTasks + " tasks. Headroom: " + totalResource + " Task Resource: " + taskResource + " waves: " + waves); // Read all credentials into the credentials instance stored in JobConf. JobConf jobConf = new JobConf(conf); jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials()); InputSplitInfoMem inputSplitInfo = null; boolean groupSplits = userPayloadProto.getGroupingEnabled(); if (groupSplits) { LOG.info("Grouping input splits"); inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, true, numTasks); } else { inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, false, 0); } if (LOG.isDebugEnabled()) { sw.stop(); LOG.debug("Time to create splits to mem: " + sw.elapsedMillis()); } List<Event> events = Lists.newArrayListWithCapacity(inputSplitInfo.getNumTasks() + 1); InputConfigureVertexTasksEvent configureVertexEvent = InputConfigureVertexTasksEvent.create( inputSplitInfo.getNumTasks(), VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate()); events.add(configureVertexEvent); if (sendSerializedEvents) { MRSplitsProto splitsProto = inputSplitInfo.getSplitsProto(); int count = 0; for (MRSplitProto mrSplit : splitsProto.getSplitsList()) { // Unnecessary array copy, can be avoided by using ByteBuffer instead of a raw array. InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count++, mrSplit.toByteString().asReadOnlyByteBuffer()); events.add(diEvent); } } else { int count = 0; if (inputSplitInfo.holdsNewFormatSplits()) { for (org.apache.hadoop.mapreduce.InputSplit split : inputSplitInfo.getNewFormatSplits()) { InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++, split); events.add(diEvent); } } else { for (org.apache.hadoop.mapred.InputSplit split : inputSplitInfo.getOldFormatSplits()) { InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++, split); events.add(diEvent); } } } return events; }
From source file:org.apache.tez.mapreduce.grouper.TezSplitGrouper.java
License:Apache License
public List<GroupedSplitContainer> getGroupedSplits(Configuration conf, List<SplitContainer> originalSplits, int desiredNumSplits, String wrappedInputFormatName, SplitSizeEstimatorWrapper estimator, SplitLocationProviderWrapper locationProvider) throws IOException, InterruptedException { LOG.info("Grouping splits in Tez"); Preconditions.checkArgument(originalSplits != null, "Splits must be specified"); int configNumSplits = conf.getInt(TEZ_GROUPING_SPLIT_COUNT, 0); if (configNumSplits > 0) { // always use config override if specified desiredNumSplits = configNumSplits; LOG.info("Desired numSplits overridden by config to: " + desiredNumSplits); }/*from w ww . jav a2 s .c om*/ if (estimator == null) { estimator = DEFAULT_SPLIT_ESTIMATOR; } if (locationProvider == null) { locationProvider = DEFAULT_SPLIT_LOCATION_PROVIDER; } List<GroupedSplitContainer> groupedSplits = null; String emptyLocation = "EmptyLocation"; String localhost = "localhost"; String[] emptyLocations = { emptyLocation }; groupedSplits = new ArrayList<GroupedSplitContainer>(desiredNumSplits); boolean allSplitsHaveLocalhost = true; long totalLength = 0; Map<String, LocationHolder> distinctLocations = createLocationsMap(conf); // go through splits and add them to locations for (SplitContainer split : originalSplits) { totalLength += estimator.getEstimatedSize(split); String[] locations = locationProvider.getPreferredLocations(split); if (locations == null || locations.length == 0) { locations = emptyLocations; allSplitsHaveLocalhost = false; } for (String location : locations) { if (location == null) { location = emptyLocation; allSplitsHaveLocalhost = false; } if (!location.equalsIgnoreCase(localhost)) { allSplitsHaveLocalhost = false; } distinctLocations.put(location, null); } } if (!(configNumSplits > 0 || originalSplits.size() == 0)) { // numSplits has not been overridden by config // numSplits has been set at runtime // there are splits generated // desired splits is less than number of splits generated // Do sanity checks int splitCount = desiredNumSplits > 0 ? desiredNumSplits : originalSplits.size(); long lengthPerGroup = totalLength / splitCount; long maxLengthPerGroup = conf.getLong(TEZ_GROUPING_SPLIT_MAX_SIZE, TEZ_GROUPING_SPLIT_MAX_SIZE_DEFAULT); long minLengthPerGroup = conf.getLong(TEZ_GROUPING_SPLIT_MIN_SIZE, TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT); if (maxLengthPerGroup < minLengthPerGroup || minLengthPerGroup <= 0) { throw new TezUncheckedException("Invalid max/min group lengths. Required min>0, max>=min. " + " max: " + maxLengthPerGroup + " min: " + minLengthPerGroup); } if (lengthPerGroup > maxLengthPerGroup) { // splits too big to work. Need to override with max size. int newDesiredNumSplits = (int) (totalLength / maxLengthPerGroup) + 1; LOG.info("Desired splits: " + desiredNumSplits + " too small. " + " Desired splitLength: " + lengthPerGroup + " Max splitLength: " + maxLengthPerGroup + " New desired splits: " + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: " + originalSplits.size()); desiredNumSplits = newDesiredNumSplits; } else if (lengthPerGroup < minLengthPerGroup) { // splits too small to work. Need to override with size. int newDesiredNumSplits = (int) (totalLength / minLengthPerGroup) + 1; /** * This is a workaround for systems like S3 that pass the same * fake hostname for all splits. */ if (!allSplitsHaveLocalhost) { desiredNumSplits = newDesiredNumSplits; } LOG.info("Desired splits: " + desiredNumSplits + " too large. " + " Desired splitLength: " + lengthPerGroup + " Min splitLength: " + minLengthPerGroup + " New desired splits: " + newDesiredNumSplits + " Final desired splits: " + desiredNumSplits + " All splits have localhost: " + allSplitsHaveLocalhost + " Total length: " + totalLength + " Original splits: " + originalSplits.size()); } } if (desiredNumSplits == 0 || originalSplits.size() == 0 || desiredNumSplits >= originalSplits.size()) { // nothing set. so return all the splits as is LOG.info("Using original number of splits: " + originalSplits.size() + " desired splits: " + desiredNumSplits); groupedSplits = new ArrayList<GroupedSplitContainer>(originalSplits.size()); for (SplitContainer split : originalSplits) { GroupedSplitContainer newSplit = new GroupedSplitContainer(1, wrappedInputFormatName, cleanupLocations(locationProvider.getPreferredLocations(split)), null); newSplit.addSplit(split); groupedSplits.add(newSplit); } return groupedSplits; } long lengthPerGroup = totalLength / desiredNumSplits; int numNodeLocations = distinctLocations.size(); int numSplitsPerLocation = originalSplits.size() / numNodeLocations; int numSplitsInGroup = originalSplits.size() / desiredNumSplits; // allocation loop here so that we have a good initial size for the lists for (String location : distinctLocations.keySet()) { distinctLocations.put(location, new LocationHolder(numSplitsPerLocation + 1)); } Set<String> locSet = new HashSet<String>(); for (SplitContainer split : originalSplits) { locSet.clear(); String[] locations = locationProvider.getPreferredLocations(split); if (locations == null || locations.length == 0) { locations = emptyLocations; } for (String location : locations) { if (location == null) { location = emptyLocation; } locSet.add(location); } for (String location : locSet) { LocationHolder holder = distinctLocations.get(location); holder.splits.add(split); } } boolean groupByLength = conf.getBoolean(TEZ_GROUPING_SPLIT_BY_LENGTH, TEZ_GROUPING_SPLIT_BY_LENGTH_DEFAULT); boolean groupByCount = conf.getBoolean(TEZ_GROUPING_SPLIT_BY_COUNT, TEZ_GROUPING_SPLIT_BY_COUNT_DEFAULT); boolean nodeLocalOnly = conf.getBoolean(TEZ_GROUPING_NODE_LOCAL_ONLY, TEZ_GROUPING_NODE_LOCAL_ONLY_DEFAULT); if (!(groupByLength || groupByCount)) { throw new TezUncheckedException("None of the grouping parameters are true: " + TEZ_GROUPING_SPLIT_BY_LENGTH + ", " + TEZ_GROUPING_SPLIT_BY_COUNT); } LOG.info("Desired numSplits: " + desiredNumSplits + " lengthPerGroup: " + lengthPerGroup + " numLocations: " + numNodeLocations + " numSplitsPerLocation: " + numSplitsPerLocation + " numSplitsInGroup: " + numSplitsInGroup + " totalLength: " + totalLength + " numOriginalSplits: " + originalSplits.size() + " . Grouping by length: " + groupByLength + " count: " + groupByCount + " nodeLocalOnly: " + nodeLocalOnly); // go through locations and group splits int splitsProcessed = 0; List<SplitContainer> group = new ArrayList<SplitContainer>(numSplitsInGroup); Set<String> groupLocationSet = new HashSet<String>(10); boolean allowSmallGroups = false; boolean doingRackLocal = false; int iterations = 0; while (splitsProcessed < originalSplits.size()) { iterations++; int numFullGroupsCreated = 0; for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) { group.clear(); groupLocationSet.clear(); String location = entry.getKey(); LocationHolder holder = entry.getValue(); SplitContainer splitContainer = holder.getUnprocessedHeadSplit(); if (splitContainer == null) { // all splits on node processed continue; } int oldHeadIndex = holder.headIndex; long groupLength = 0; int groupNumSplits = 0; do { group.add(splitContainer); groupLength += estimator.getEstimatedSize(splitContainer); groupNumSplits++; holder.incrementHeadIndex(); splitContainer = holder.getUnprocessedHeadSplit(); } while (splitContainer != null && (!groupByLength || (groupLength + estimator.getEstimatedSize(splitContainer) <= lengthPerGroup)) && (!groupByCount || (groupNumSplits + 1 <= numSplitsInGroup))); if (holder.isEmpty() && !allowSmallGroups && (!groupByLength || groupLength < lengthPerGroup / 2) && (!groupByCount || groupNumSplits < numSplitsInGroup / 2)) { // group too small, reset it holder.headIndex = oldHeadIndex; continue; } numFullGroupsCreated++; // One split group created String[] groupLocation = { location }; if (location == emptyLocation) { groupLocation = null; } else if (doingRackLocal) { for (SplitContainer splitH : group) { String[] locations = locationProvider.getPreferredLocations(splitH); if (locations != null) { for (String loc : locations) { if (loc != null) { groupLocationSet.add(loc); } } } } groupLocation = groupLocationSet.toArray(groupLocation); } GroupedSplitContainer groupedSplit = new GroupedSplitContainer(group.size(), wrappedInputFormatName, groupLocation, // pass rack local hint directly to AM ((doingRackLocal && location != emptyLocation) ? location : null)); for (SplitContainer groupedSplitContainer : group) { groupedSplit.addSplit(groupedSplitContainer); Preconditions.checkState(groupedSplitContainer.isProcessed() == false, "Duplicates in grouping at location: " + location); groupedSplitContainer.setIsProcessed(true); splitsProcessed++; } if (LOG.isDebugEnabled()) { LOG.debug("Grouped " + group.size() + " length: " + groupedSplit.getLength() + " split at: " + location); } groupedSplits.add(groupedSplit); } if (!doingRackLocal && numFullGroupsCreated < 1) { // no node could create a regular node-local group. // Allow small groups if that is configured. if (nodeLocalOnly && !allowSmallGroups) { LOG.info( "Allowing small groups early after attempting to create full groups at iteration: {}, groupsCreatedSoFar={}", iterations, groupedSplits.size()); allowSmallGroups = true; continue; } // else go rack-local doingRackLocal = true; // re-create locations int numRemainingSplits = originalSplits.size() - splitsProcessed; Set<SplitContainer> remainingSplits = new HashSet<SplitContainer>(numRemainingSplits); // gather remaining splits. for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) { LocationHolder locHolder = entry.getValue(); while (!locHolder.isEmpty()) { SplitContainer splitHolder = locHolder.getUnprocessedHeadSplit(); if (splitHolder != null) { remainingSplits.add(splitHolder); locHolder.incrementHeadIndex(); } } } if (remainingSplits.size() != numRemainingSplits) { throw new TezUncheckedException( "Expected: " + numRemainingSplits + " got: " + remainingSplits.size()); } // doing all this now instead of up front because the number of remaining // splits is expected to be much smaller RackResolver.init(conf); Map<String, String> locToRackMap = new HashMap<String, String>(distinctLocations.size()); Map<String, LocationHolder> rackLocations = createLocationsMap(conf); for (String location : distinctLocations.keySet()) { String rack = emptyLocation; if (location != emptyLocation) { rack = RackResolver.resolve(location).getNetworkLocation(); } locToRackMap.put(location, rack); if (rackLocations.get(rack) == null) { // splits will probably be located in all racks rackLocations.put(rack, new LocationHolder(numRemainingSplits)); } } distinctLocations.clear(); HashSet<String> rackSet = new HashSet<String>(rackLocations.size()); int numRackSplitsToGroup = remainingSplits.size(); for (SplitContainer split : originalSplits) { if (numRackSplitsToGroup == 0) { break; } // Iterate through the original splits in their order and consider them for grouping. // This maintains the original ordering in the list and thus subsequent grouping will // maintain that order if (!remainingSplits.contains(split)) { continue; } numRackSplitsToGroup--; rackSet.clear(); String[] locations = locationProvider.getPreferredLocations(split); if (locations == null || locations.length == 0) { locations = emptyLocations; } for (String location : locations) { if (location == null) { location = emptyLocation; } rackSet.add(locToRackMap.get(location)); } for (String rack : rackSet) { rackLocations.get(rack).splits.add(split); } } remainingSplits.clear(); distinctLocations = rackLocations; // adjust split length to be smaller because the data is non local float rackSplitReduction = conf.getFloat(TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION, TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION_DEFAULT); if (rackSplitReduction > 0) { long newLengthPerGroup = (long) (lengthPerGroup * rackSplitReduction); int newNumSplitsInGroup = (int) (numSplitsInGroup * rackSplitReduction); if (newLengthPerGroup > 0) { lengthPerGroup = newLengthPerGroup; } if (newNumSplitsInGroup > 0) { numSplitsInGroup = newNumSplitsInGroup; } } LOG.info("Doing rack local after iteration: " + iterations + " splitsProcessed: " + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplits.size() + " lengthPerGroup: " + lengthPerGroup + " numSplitsInGroup: " + numSplitsInGroup); // dont do smallGroups for the first pass continue; } if (!allowSmallGroups && numFullGroupsCreated <= numNodeLocations / 10) { // a few nodes have a lot of data or data is thinly spread across nodes // so allow small groups now allowSmallGroups = true; LOG.info("Allowing small groups after iteration: " + iterations + " splitsProcessed: " + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplits.size()); } if (LOG.isDebugEnabled()) { LOG.debug("Iteration: " + iterations + " splitsProcessed: " + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplits.size()); } } LOG.info("Number of splits desired: " + desiredNumSplits + " created: " + groupedSplits.size() + " splitsProcessed: " + splitsProcessed); return groupedSplits; }
From source file:org.apache.tez.runtime.library.broadcast.input.BroadcastInputManager.java
License:Apache License
public BroadcastInputManager(String uniqueIdentifier, Configuration conf) { this.conf = conf; this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier); this.localDirAllocator = new LocalDirAllocator(TezJobConfig.LOCAL_DIRS); // Setup configuration final float maxInMemCopyUse = conf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse); }//from www . jav a 2s .c om // Allow unit tests to fix Runtime memory this.memoryLimit = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE)) * maxInMemCopyUse); final float singleShuffleMemoryLimitPercent = conf.getFloat( TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new IllegalArgumentException( "Invalid value for " + TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": " + singleShuffleMemoryLimitPercent); } this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent); LOG.info("BroadcastInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: " + this.maxSingleShuffleLimit); }
From source file:org.apache.tez.runtime.library.common.shuffle.impl.MergeManager.java
License:Apache License
public MergeManager(Configuration conf, FileSystem localFS, LocalDirAllocator localDirAllocator, TezInputContext inputContext, Combiner combiner, TezCounter spilledRecordsCounter, TezCounter reduceCombineInputCounter, TezCounter mergedMapOutputsCounter, ExceptionReporter exceptionReporter) { this.inputContext = inputContext; this.conf = conf; this.localDirAllocator = localDirAllocator; this.exceptionReporter = exceptionReporter; this.combiner = combiner; this.reduceCombineInputCounter = reduceCombineInputCounter; this.spilledRecordsCounter = spilledRecordsCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.mapOutputFile = new TezTaskOutputFiles(conf, inputContext.getUniqueIdentifier()); this.localFS = localFS; this.rfs = ((LocalFileSystem) localFS).getRaw(); if (ConfigUtils.isIntermediateInputCompressed(conf)) { Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(conf, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); } else {//from ww w .j a v a 2 s . c o m codec = null; } this.ifileReadAhead = conf.getBoolean(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD, TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT); if (this.ifileReadAhead) { this.ifileReadAheadLength = conf.getInt(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT); } else { this.ifileReadAheadLength = 0; } this.ifileBufferSize = conf.getInt("io.file.buffer.size", TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT); final float maxInMemCopyUse = conf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse); } // Allow unit tests to fix Runtime memory this.memoryLimit = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE)) * maxInMemCopyUse); this.ioSortFactor = conf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR, TezJobConfig.DEFAULT_TEZ_RUNTIME_IO_SORT_FACTOR); final float singleShuffleMemoryLimitPercent = conf.getFloat( TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new IllegalArgumentException( "Invalid value for " + TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": " + singleShuffleMemoryLimitPercent); } this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent); this.memToMemMergeOutputsThreshold = conf.getInt(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, ioSortFactor); this.mergeThreshold = (long) (this.memoryLimit * conf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT)); LOG.info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor=" + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold); if (this.maxSingleShuffleLimit >= this.mergeThreshold) { throw new RuntimeException("Invlaid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold" + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + "mergeThreshold: " + this.mergeThreshold); } boolean allowMemToMemMerge = conf.getBoolean(TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM); if (allowMemToMemMerge) { this.memToMemMerger = new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold); this.memToMemMerger.start(); } else { this.memToMemMerger = null; } this.inMemoryMerger = new InMemoryMerger(this); this.inMemoryMerger.start(); this.onDiskMerger = new OnDiskMerger(this); this.onDiskMerger.start(); }
From source file:org.apache.tez.runtime.library.common.shuffle.impl.MergeManager.java
License:Apache License
private TezRawKeyValueIterator finalMerge(Configuration job, FileSystem fs, List<MapOutput> inMemoryMapOutputs, List<Path> onDiskMapOutputs) throws IOException { LOG.info("finalMerge called with " + inMemoryMapOutputs.size() + " in-memory map-outputs and " + onDiskMapOutputs.size() + " on-disk map-outputs"); final float maxRedPer = job.getFloat(TezJobConfig.TEZ_RUNTIME_INPUT_BUFFER_PERCENT, TezJobConfig.DEFAULT_TEZ_RUNTIME_INPUT_BUFFER_PERCENT); if (maxRedPer > 1.0 || maxRedPer < 0.0) { throw new IOException(TezJobConfig.TEZ_RUNTIME_INPUT_BUFFER_PERCENT + maxRedPer); }// www . j a v a2 s. c o m int maxInMemReduce = (int) Math.min(Runtime.getRuntime().maxMemory() * maxRedPer, Integer.MAX_VALUE); LOG.info("Memory allocated for final merge output: " + maxInMemReduce + ", using factor: " + maxRedPer); // merge config params Class keyClass = (Class) ConfigUtils.getIntermediateInputKeyClass(job); Class valueClass = (Class) ConfigUtils.getIntermediateInputValueClass(job); final Path tmpDir = new Path(inputContext.getUniqueIdentifier()); final RawComparator comparator = (RawComparator) ConfigUtils.getIntermediateInputKeyComparator(job); // segments required to vacate memory List<Segment> memDiskSegments = new ArrayList<Segment>(); long inMemToDiskBytes = 0; boolean mergePhaseFinished = false; if (inMemoryMapOutputs.size() > 0) { int srcTaskId = inMemoryMapOutputs.get(0).getAttemptIdentifier().getInputIdentifier().getSrcTaskIndex(); inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, memDiskSegments, maxInMemReduce); final int numMemDiskSegments = memDiskSegments.size(); if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.size()) { // If we reach here, it implies that we have less than io.sort.factor // disk segments and this will be incremented by 1 (result of the // memory segments merge). Since this total would still be // <= io.sort.factor, we will not do any more intermediate merges, // the merge of all these disk segments would be directly fed to the // reduce method mergePhaseFinished = true; // must spill to disk, but can't retain in-mem for intermediate merge final Path outputPath = mapOutputFile.getInputFileForWrite(srcTaskId, inMemToDiskBytes) .suffix(Constants.MERGED_OUTPUT_PREFIX); final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs, keyClass, valueClass, memDiskSegments, numMemDiskSegments, tmpDir, comparator, nullProgressable, spilledRecordsCounter, null, null); final Writer writer = new Writer(job, fs, outputPath, keyClass, valueClass, codec, null); try { TezMerger.writeFile(rIter, writer, nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS); // add to list of final disk outputs. onDiskMapOutputs.add(outputPath); } catch (IOException e) { if (null != outputPath) { try { fs.delete(outputPath, true); } catch (IOException ie) { // NOTHING } } throw e; } finally { if (null != writer) { writer.close(); } } LOG.info("Merged " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes to disk to satisfy " + "reduce memory limit"); inMemToDiskBytes = 0; memDiskSegments.clear(); } else if (inMemToDiskBytes != 0) { LOG.info("Keeping " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes in memory for " + "intermediate, on-disk merge"); } } // segments on disk List<Segment> diskSegments = new ArrayList<Segment>(); long onDiskBytes = inMemToDiskBytes; Path[] onDisk = onDiskMapOutputs.toArray(new Path[onDiskMapOutputs.size()]); for (Path file : onDisk) { onDiskBytes += fs.getFileStatus(file).getLen(); LOG.debug("Disk file: " + file + " Length is " + fs.getFileStatus(file).getLen()); diskSegments.add(new Segment(job, fs, file, codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize, false, (file.toString().endsWith(Constants.MERGED_OUTPUT_PREFIX) ? null : mergedMapOutputsCounter))); } LOG.info("Merging " + onDisk.length + " files, " + onDiskBytes + " bytes from disk"); Collections.sort(diskSegments, new Comparator<Segment>() { public int compare(Segment o1, Segment o2) { if (o1.getLength() == o2.getLength()) { return 0; } return o1.getLength() < o2.getLength() ? -1 : 1; } }); // build final list of segments from merged backed by disk + in-mem List<Segment> finalSegments = new ArrayList<Segment>(); long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0); LOG.info("Merging " + finalSegments.size() + " segments, " + inMemBytes + " bytes from memory into reduce"); if (0 != onDiskBytes) { final int numInMemSegments = memDiskSegments.size(); diskSegments.addAll(0, memDiskSegments); memDiskSegments.clear(); TezRawKeyValueIterator diskMerge = TezMerger.merge(job, fs, keyClass, valueClass, diskSegments, ioSortFactor, numInMemSegments, tmpDir, comparator, nullProgressable, false, spilledRecordsCounter, null, null); diskSegments.clear(); if (0 == finalSegments.size()) { return diskMerge; } finalSegments.add(new Segment(new RawKVIteratorReader(diskMerge, onDiskBytes), true)); } return TezMerger.merge(job, fs, keyClass, valueClass, finalSegments, finalSegments.size(), tmpDir, comparator, nullProgressable, spilledRecordsCounter, null, null); }
From source file:org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator.java
License:Apache License
public SimpleFetchedInputAllocator(String uniqueIdentifier, Configuration conf, long maxTaskAvailableMemory, long memoryAvailable) { this.conf = conf; this.maxAvailableTaskMemory = maxTaskAvailableMemory; this.initialMemoryAvailable = memoryAvailable; this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier); this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); // Setup configuration final float maxInMemCopyUse = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse); }//from w w w .ja v a 2s. com long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse); if (memReq <= this.initialMemoryAvailable) { this.memoryLimit = memReq; } else { this.memoryLimit = initialMemoryAvailable; } LOG.info("RequestedMem=" + memReq + ", Allocated: " + this.memoryLimit); final float singleShuffleMemoryLimitPercent = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new IllegalArgumentException( "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": " + singleShuffleMemoryLimitPercent); } //TODO: cap it to MAX_VALUE until MemoryFetchedInput can handle > 2 GB this.maxSingleShuffleLimit = (long) Math.min((memoryLimit * singleShuffleMemoryLimitPercent), Integer.MAX_VALUE); LOG.info("SimpleInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: " + this.maxSingleShuffleLimit); }
From source file:org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator.java
License:Apache License
@Private public static long getInitialMemoryReq(Configuration conf, long maxAvailableTaskMemory) { final float maxInMemCopyUse = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse); }/*from w w w .j a v a 2 s.com*/ long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse); return memReq; }
From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager.java
License:Apache License
/** * Construct the MergeManager. Must call start before it becomes usable. *//* w w w . j a v a 2 s .c om*/ public MergeManager(Configuration conf, FileSystem localFS, LocalDirAllocator localDirAllocator, InputContext inputContext, Combiner combiner, TezCounter spilledRecordsCounter, TezCounter reduceCombineInputCounter, TezCounter mergedMapOutputsCounter, ExceptionReporter exceptionReporter, long initialMemoryAvailable, CompressionCodec codec, boolean ifileReadAheadEnabled, int ifileReadAheadLength) { this.inputContext = inputContext; this.conf = conf; this.localDirAllocator = localDirAllocator; this.exceptionReporter = exceptionReporter; this.initialMemoryAvailable = initialMemoryAvailable; this.combiner = combiner; this.reduceCombineInputCounter = reduceCombineInputCounter; this.spilledRecordsCounter = spilledRecordsCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.mapOutputFile = new TezTaskOutputFiles(conf, inputContext.getUniqueIdentifier()); this.localFS = localFS; this.rfs = ((LocalFileSystem) localFS).getRaw(); this.numDiskToDiskMerges = inputContext.getCounters().findCounter(TaskCounter.NUM_DISK_TO_DISK_MERGES); this.numMemToDiskMerges = inputContext.getCounters().findCounter(TaskCounter.NUM_MEM_TO_DISK_MERGES); this.additionalBytesWritten = inputContext.getCounters() .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); this.additionalBytesRead = inputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); this.codec = codec; this.ifileReadAhead = ifileReadAheadEnabled; if (this.ifileReadAhead) { this.ifileReadAheadLength = ifileReadAheadLength; } else { this.ifileReadAheadLength = 0; } this.ifileBufferSize = conf.getInt("io.file.buffer.size", TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT); // Figure out initial memory req start final float maxInMemCopyUse = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse); } // Allow unit tests to fix Runtime memory long memLimit = conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, (long) (inputContext.getTotalMemoryAvailableToTask() * maxInMemCopyUse)); float maxRedPer = conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_BUFFER_PERCENT_DEFAULT); if (maxRedPer > 1.0 || maxRedPer < 0.0) { throw new TezUncheckedException( TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT + maxRedPer); } long maxRedBuffer = (long) (inputContext.getTotalMemoryAvailableToTask() * maxRedPer); // Figure out initial memory req end if (this.initialMemoryAvailable < memLimit) { this.memoryLimit = this.initialMemoryAvailable; } else { this.memoryLimit = memLimit; } if (this.initialMemoryAvailable < maxRedBuffer) { this.postMergeMemLimit = this.initialMemoryAvailable; } else { this.postMergeMemLimit = maxRedBuffer; } LOG.info("InitialRequest: ShuffleMem=" + memLimit + ", postMergeMem=" + maxRedBuffer + ", RuntimeTotalAvailable=" + this.initialMemoryAvailable + ". Updated to: ShuffleMem=" + this.memoryLimit + ", postMergeMem: " + this.postMergeMemLimit); this.ioSortFactor = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT); final float singleShuffleMemoryLimitPercent = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new IllegalArgumentException( "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": " + singleShuffleMemoryLimitPercent); } //TODO: Cap it to MAX_VALUE until MapOutput starts supporting > 2 GB this.maxSingleShuffleLimit = (long) Math.min((memoryLimit * singleShuffleMemoryLimitPercent), Integer.MAX_VALUE); this.memToMemMergeOutputsThreshold = conf .getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, ioSortFactor); this.mergeThreshold = (long) (this.memoryLimit * conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT_DEFAULT)); LOG.info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor=" + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold); if (this.maxSingleShuffleLimit >= this.mergeThreshold) { throw new RuntimeException("Invlaid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold" + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + ", mergeThreshold: " + this.mergeThreshold); } boolean allowMemToMemMerge = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM_DEFAULT); if (allowMemToMemMerge) { this.memToMemMerger = new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold); } else { this.memToMemMerger = null; } this.inMemoryMerger = new InMemoryMerger(this); this.onDiskMerger = new OnDiskMerger(this); }
From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager.java
License:Apache License
/** * Exposing this to get an initial memory ask without instantiating the object. */// w w w . ja v a 2s.c o m @Private static long getInitialMemoryRequirement(Configuration conf, long maxAvailableTaskMemory) { final float maxInMemCopyUse = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse); } // Allow unit tests to fix Runtime memory long memLimit = conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, (long) (maxAvailableTaskMemory * maxInMemCopyUse)); LOG.info("Initial Shuffle Memory Required: " + memLimit + ", based on INPUT_BUFFER_factor: " + maxInMemCopyUse); float maxRedPer = conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_BUFFER_PERCENT_DEFAULT); if (maxRedPer > 1.0 || maxRedPer < 0.0) { throw new TezUncheckedException( TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT + maxRedPer); } long maxRedBuffer = (long) (maxAvailableTaskMemory * maxRedPer); LOG.info("Initial Memory required for final merged output: " + maxRedBuffer + ", using factor: " + maxRedPer); long reqMem = Math.max(maxRedBuffer, memLimit); return reqMem; }