Example usage for org.apache.hadoop.conf Configuration getLong

List of usage examples for org.apache.hadoop.conf Configuration getLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getLong.

Prototype

public long getLong(String name, long defaultValue) 

Source Link

Document

Get the value of the name property as a long.

Usage

From source file:org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService.java

License:Apache License

@Override
public void serviceInit(Configuration conf) throws Exception {
    LOG.info("Initializing ATSService");
    timelineClient = TimelineClient.createTimelineClient();
    timelineClient.init(conf);//from  w w  w  .ja v a 2 s  . co  m
    maxTimeToWaitOnShutdown = conf.getLong(TezConfiguration.YARN_ATS_EVENT_FLUSH_TIMEOUT_MILLIS,
            TezConfiguration.YARN_ATS_EVENT_FLUSH_TIMEOUT_MILLIS_DEFAULT);
    maxEventsPerBatch = conf.getInt(TezConfiguration.YARN_ATS_MAX_EVENTS_PER_BATCH,
            TezConfiguration.YARN_ATS_MAX_EVENTS_PER_BATCH_DEFAULT);
    maxPollingTimeMillis = conf.getInt(TezConfiguration.YARN_ATS_MAX_POLLING_TIME_PER_EVENT,
            TezConfiguration.YARN_ATS_MAX_POLLING_TIME_PER_EVENT_DEFAULT);
    if (maxTimeToWaitOnShutdown < 0) {
        waitForeverOnShutdown = true;
    }
    sessionDomainId = conf.get(TezConfiguration.YARN_ATS_ACL_SESSION_DOMAIN_ID);

    LOG.info("Using " + atsHistoryACLManagerClassName + " to manage Timeline ACLs");
    try {
        historyACLPolicyManager = ReflectionUtils.createClazzInstance(atsHistoryACLManagerClassName);
        historyACLPolicyManager.setConf(conf);
    } catch (TezUncheckedException e) {
        LOG.warn("Could not instantiate object for " + atsHistoryACLManagerClassName
                + ". ACLs cannot be enforced correctly for history data in Timeline", e);
        if (!conf.getBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS,
                TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS_DEFAULT)) {
            throw e;
        }
        historyACLPolicyManager = null;
    }

}

From source file:org.apache.tez.dag.history.logging.ats.ATSV15HistoryLoggingService.java

License:Apache License

@Override
public void serviceInit(Configuration serviceConf) throws Exception {
    Configuration conf = new Configuration(serviceConf);

    String summaryEntityTypesStr = EntityTypes.TEZ_APPLICATION + "," + EntityTypes.TEZ_APPLICATION_ATTEMPT + ","
            + EntityTypes.TEZ_DAG_ID;/*from  w  w  w .j a  va 2 s  .  c  om*/

    // Ensure that summary entity types are defined properly for Tez.
    if (conf.getBoolean(TezConfiguration.TEZ_AM_ATS_V15_OVERRIDE_SUMMARY_TYPES,
            TezConfiguration.TEZ_AM_ATS_V15_OVERRIDE_SUMMARY_TYPES_DEFAULT)) {
        conf.set(YarnConfiguration.TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_SUMMARY_ENTITY_TYPES,
                summaryEntityTypesStr);
    }

    historyLoggingEnabled = conf.getBoolean(TezConfiguration.TEZ_AM_HISTORY_LOGGING_ENABLED,
            TezConfiguration.TEZ_AM_HISTORY_LOGGING_ENABLED_DEFAULT);
    if (!historyLoggingEnabled) {
        LOG.info("ATSService: History Logging disabled. " + TezConfiguration.TEZ_AM_HISTORY_LOGGING_ENABLED
                + " set to false");
        return;
    }

    if (conf.getBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED,
            YarnConfiguration.DEFAULT_TIMELINE_SERVICE_ENABLED)) {
        timelineClient = TimelineClient.createTimelineClient();
        timelineClient.init(conf);
    } else {
        this.timelineClient = null;
        if (conf.get(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS, "")
                .equals(atsHistoryLoggingServiceClassName)) {
            LOG.warn(atsHistoryLoggingServiceClassName + " is disabled due to Timeline Service being disabled, "
                    + YarnConfiguration.TIMELINE_SERVICE_ENABLED + " set to false");
        }
    }
    maxTimeToWaitOnShutdown = conf.getLong(TezConfiguration.YARN_ATS_EVENT_FLUSH_TIMEOUT_MILLIS,
            TezConfiguration.YARN_ATS_EVENT_FLUSH_TIMEOUT_MILLIS_DEFAULT);
    maxPollingTimeMillis = conf.getInt(TezConfiguration.YARN_ATS_MAX_POLLING_TIME_PER_EVENT,
            TezConfiguration.YARN_ATS_MAX_POLLING_TIME_PER_EVENT_DEFAULT);
    if (maxTimeToWaitOnShutdown < 0) {
        waitForeverOnShutdown = true;
    }
    sessionDomainId = conf.get(TezConfiguration.YARN_ATS_ACL_SESSION_DOMAIN_ID);

    LOG.info("Initializing " + ATSV15HistoryLoggingService.class.getSimpleName() + " with "
            + ", maxPollingTime(ms)=" + maxPollingTimeMillis + ", waitTimeForShutdown(ms)="
            + maxTimeToWaitOnShutdown + ", TimelineACLManagerClass=" + atsHistoryACLManagerClassName);

    try {
        historyACLPolicyManager = ReflectionUtils.createClazzInstance(atsHistoryACLManagerClassName);
        historyACLPolicyManager.setConf(conf);
    } catch (TezReflectionException e) {
        LOG.warn("Could not instantiate object for " + atsHistoryACLManagerClassName
                + ". ACLs cannot be enforced correctly for history data in Timeline", e);
        if (!conf.getBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS,
                TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS_DEFAULT)) {
            throw e;
        }
        historyACLPolicyManager = null;
    }

}

From source file:org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.java

License:Apache License

@Override
public void initialize() {
    Configuration conf;
    try {//from w  w  w  .  j  a  v  a  2  s.  com
        conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());
    } catch (IOException e) {
        throw new TezUncheckedException(e);
    }

    this.slowStartMinSrcCompletionFraction = conf.getFloat(
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION,
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION_DEFAULT);
    this.slowStartMaxSrcCompletionFraction = conf.getFloat(
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION,
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION_DEFAULT);

    if (slowStartMinSrcCompletionFraction < 0
            || slowStartMaxSrcCompletionFraction < slowStartMinSrcCompletionFraction) {
        throw new IllegalArgumentException("Invalid values for slowStartMinSrcCompletionFraction"
                + "/slowStartMaxSrcCompletionFraction. Min cannot be < 0 and " + "max cannot be < min.");
    }

    enableAutoParallelism = conf.getBoolean(
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL,
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL_DEFAULT);
    desiredTaskInputDataSize = conf.getLong(
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE_DEFAULT);
    minTaskParallelism = Math.max(1,
            conf.getInt(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM,
                    ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM_DEFAULT));
    LOG.info("Shuffle Vertex Manager: settings" + " minFrac:" + slowStartMinSrcCompletionFraction + " maxFrac:"
            + slowStartMaxSrcCompletionFraction + " auto:" + enableAutoParallelism + " desiredTaskIput:"
            + desiredTaskInputDataSize + " minTasks:" + minTaskParallelism);

    Map<String, EdgeProperty> inputs = getContext().getInputVertexEdgeProperties();
    for (Map.Entry<String, EdgeProperty> entry : inputs.entrySet()) {
        srcVertexInfo.put(entry.getKey(), new SourceVertexInfo(entry.getValue()));
        getContext().registerForVertexStateUpdates(entry.getKey(), EnumSet.of(VertexState.CONFIGURED));
        if (entry.getValue().getDataMovementType() == DataMovementType.SCATTER_GATHER) {
            bipartiteSources++;
        }
    }
    if (bipartiteSources == 0) {
        throw new TezUncheckedException("Atleast 1 bipartite source should exist");
    }

    if (enableAutoParallelism) {
        getContext().vertexReconfigurationPlanned();
    }
    // dont track the source tasks here since those tasks may themselves be
    // dynamically changed as the DAG progresses.

}

From source file:org.apache.tez.engine.common.shuffle.impl.MergeManager.java

License:Apache License

public MergeManager(TezTaskAttemptID taskAttemptId, Configuration conf, FileSystem localFS,
        LocalDirAllocator localDirAllocator, TezTaskReporter reporter, Processor combineProcessor,
        TezCounter spilledRecordsCounter, TezCounter reduceCombineInputCounter,
        TezCounter mergedMapOutputsCounter, ExceptionReporter exceptionReporter, Progress mergePhase) {
    this.taskAttemptId = taskAttemptId;
    this.conf = conf;
    this.localDirAllocator = localDirAllocator;
    this.exceptionReporter = exceptionReporter;

    this.reporter = reporter;
    this.combineProcessor = combineProcessor;
    this.reduceCombineInputCounter = reduceCombineInputCounter;
    this.spilledRecordsCounter = spilledRecordsCounter;
    this.mergedMapOutputsCounter = mergedMapOutputsCounter;
    this.mapOutputFile = new TezTaskOutputFiles();
    this.mapOutputFile.setConf(conf);

    this.localFS = localFS;
    this.rfs = ((LocalFileSystem) localFS).getRaw();

    if (ConfigUtils.isIntermediateInputCompressed(conf)) {
        Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(conf,
                DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
    } else {/*w  w  w .j a  va  2  s  .  c om*/
        codec = null;
    }

    final float maxInMemCopyUse = conf.getFloat(TezJobConfig.TEZ_ENGINE_SHUFFLE_INPUT_BUFFER_PERCENT,
            TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_INPUT_BUFFER_PERCENT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezJobConfig.TEZ_ENGINE_SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }

    // Allow unit tests to fix Runtime memory
    this.memoryLimit = (long) (conf.getLong(Constants.TEZ_ENGINE_TASK_MEMORY,
            Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE)) * maxInMemCopyUse);

    this.ioSortFactor = conf.getInt(TezJobConfig.TEZ_ENGINE_IO_SORT_FACTOR,
            TezJobConfig.DEFAULT_TEZ_ENGINE_IO_SORT_FACTOR);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezJobConfig.TEZ_ENGINE_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_MEMORY_LIMIT_PERCENT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezJobConfig.TEZ_ENGINE_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent);
    this.memToMemMergeOutputsThreshold = conf.getInt(TezJobConfig.TEZ_ENGINE_SHUFFLE_MEMTOMEM_SEGMENTS,
            ioSortFactor);
    this.mergeThreshold = (long) (this.memoryLimit
            * conf.getFloat(TezJobConfig.TEZ_ENGINE_SHUFFLE_MERGE_PERCENT,
                    TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_MERGE_PERCENT));
    LOG.info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit="
            + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor="
            + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold);

    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
        throw new RuntimeException("Invlaid configuration: "
                + "maxSingleShuffleLimit should be less than mergeThreshold" + "maxSingleShuffleLimit: "
                + this.maxSingleShuffleLimit + "mergeThreshold: " + this.mergeThreshold);
    }

    boolean allowMemToMemMerge = conf.getBoolean(TezJobConfig.TEZ_ENGINE_SHUFFLE_ENABLE_MEMTOMEM,
            TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_ENABLE_MEMTOMEM);
    if (allowMemToMemMerge) {
        this.memToMemMerger = new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold);
        this.memToMemMerger.start();
    } else {
        this.memToMemMerger = null;
    }

    this.inMemoryMerger = new InMemoryMerger(this);
    this.inMemoryMerger.start();

    this.onDiskMerger = new OnDiskMerger(this);
    this.onDiskMerger.start();

    this.mergePhase = mergePhase;
}

From source file:org.apache.tez.engine.common.sort.impl.TezMerger.java

License:Apache License

public static <K extends Object, V extends Object> void writeFile(TezRawKeyValueIterator records, Writer writer,
        Progressable progressable, Configuration conf) throws IOException {
    long progressBar = conf.getLong(TezJobConfig.RECORDS_BEFORE_PROGRESS,
            TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
    long recordCtr = 0;
    while (records.next()) {
        writer.append(records.getKey(), records.getValue());

        if (((recordCtr++) % progressBar) == 0) {
            progressable.progress();/*w w w.j  a v  a  2 s  . co m*/
        }
    }
}

From source file:org.apache.tez.mapreduce.grouper.TezSplitGrouper.java

License:Apache License

public List<GroupedSplitContainer> getGroupedSplits(Configuration conf, List<SplitContainer> originalSplits,
        int desiredNumSplits, String wrappedInputFormatName, SplitSizeEstimatorWrapper estimator,
        SplitLocationProviderWrapper locationProvider) throws IOException, InterruptedException {
    LOG.info("Grouping splits in Tez");
    Preconditions.checkArgument(originalSplits != null, "Splits must be specified");

    int configNumSplits = conf.getInt(TEZ_GROUPING_SPLIT_COUNT, 0);
    if (configNumSplits > 0) {
        // always use config override if specified
        desiredNumSplits = configNumSplits;
        LOG.info("Desired numSplits overridden by config to: " + desiredNumSplits);
    }//from   w  ww  .  jav a2  s. c o  m

    if (estimator == null) {
        estimator = DEFAULT_SPLIT_ESTIMATOR;
    }
    if (locationProvider == null) {
        locationProvider = DEFAULT_SPLIT_LOCATION_PROVIDER;
    }

    List<GroupedSplitContainer> groupedSplits = null;
    String emptyLocation = "EmptyLocation";
    String localhost = "localhost";
    String[] emptyLocations = { emptyLocation };
    groupedSplits = new ArrayList<GroupedSplitContainer>(desiredNumSplits);

    boolean allSplitsHaveLocalhost = true;

    long totalLength = 0;
    Map<String, LocationHolder> distinctLocations = createLocationsMap(conf);
    // go through splits and add them to locations
    for (SplitContainer split : originalSplits) {
        totalLength += estimator.getEstimatedSize(split);
        String[] locations = locationProvider.getPreferredLocations(split);
        if (locations == null || locations.length == 0) {
            locations = emptyLocations;
            allSplitsHaveLocalhost = false;
        }
        for (String location : locations) {
            if (location == null) {
                location = emptyLocation;
                allSplitsHaveLocalhost = false;
            }
            if (!location.equalsIgnoreCase(localhost)) {
                allSplitsHaveLocalhost = false;
            }
            distinctLocations.put(location, null);
        }
    }

    if (!(configNumSplits > 0 || originalSplits.size() == 0)) {
        // numSplits has not been overridden by config
        // numSplits has been set at runtime
        // there are splits generated
        // desired splits is less than number of splits generated
        // Do sanity checks

        int splitCount = desiredNumSplits > 0 ? desiredNumSplits : originalSplits.size();
        long lengthPerGroup = totalLength / splitCount;

        long maxLengthPerGroup = conf.getLong(TEZ_GROUPING_SPLIT_MAX_SIZE, TEZ_GROUPING_SPLIT_MAX_SIZE_DEFAULT);
        long minLengthPerGroup = conf.getLong(TEZ_GROUPING_SPLIT_MIN_SIZE, TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT);
        if (maxLengthPerGroup < minLengthPerGroup || minLengthPerGroup <= 0) {
            throw new TezUncheckedException("Invalid max/min group lengths. Required min>0, max>=min. "
                    + " max: " + maxLengthPerGroup + " min: " + minLengthPerGroup);
        }
        if (lengthPerGroup > maxLengthPerGroup) {
            // splits too big to work. Need to override with max size.
            int newDesiredNumSplits = (int) (totalLength / maxLengthPerGroup) + 1;
            LOG.info("Desired splits: " + desiredNumSplits + " too small. " + " Desired splitLength: "
                    + lengthPerGroup + " Max splitLength: " + maxLengthPerGroup + " New desired splits: "
                    + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: "
                    + originalSplits.size());

            desiredNumSplits = newDesiredNumSplits;
        } else if (lengthPerGroup < minLengthPerGroup) {
            // splits too small to work. Need to override with size.
            int newDesiredNumSplits = (int) (totalLength / minLengthPerGroup) + 1;
            /**
             * This is a workaround for systems like S3 that pass the same
             * fake hostname for all splits.
             */
            if (!allSplitsHaveLocalhost) {
                desiredNumSplits = newDesiredNumSplits;
            }

            LOG.info("Desired splits: " + desiredNumSplits + " too large. " + " Desired splitLength: "
                    + lengthPerGroup + " Min splitLength: " + minLengthPerGroup + " New desired splits: "
                    + newDesiredNumSplits + " Final desired splits: " + desiredNumSplits
                    + " All splits have localhost: " + allSplitsHaveLocalhost + " Total length: " + totalLength
                    + " Original splits: " + originalSplits.size());
        }
    }

    if (desiredNumSplits == 0 || originalSplits.size() == 0 || desiredNumSplits >= originalSplits.size()) {
        // nothing set. so return all the splits as is
        LOG.info("Using original number of splits: " + originalSplits.size() + " desired splits: "
                + desiredNumSplits);
        groupedSplits = new ArrayList<GroupedSplitContainer>(originalSplits.size());
        for (SplitContainer split : originalSplits) {
            GroupedSplitContainer newSplit = new GroupedSplitContainer(1, wrappedInputFormatName,
                    cleanupLocations(locationProvider.getPreferredLocations(split)), null);
            newSplit.addSplit(split);
            groupedSplits.add(newSplit);
        }
        return groupedSplits;
    }

    long lengthPerGroup = totalLength / desiredNumSplits;
    int numNodeLocations = distinctLocations.size();
    int numSplitsPerLocation = originalSplits.size() / numNodeLocations;
    int numSplitsInGroup = originalSplits.size() / desiredNumSplits;

    // allocation loop here so that we have a good initial size for the lists
    for (String location : distinctLocations.keySet()) {
        distinctLocations.put(location, new LocationHolder(numSplitsPerLocation + 1));
    }

    Set<String> locSet = new HashSet<String>();
    for (SplitContainer split : originalSplits) {
        locSet.clear();
        String[] locations = locationProvider.getPreferredLocations(split);
        if (locations == null || locations.length == 0) {
            locations = emptyLocations;
        }
        for (String location : locations) {
            if (location == null) {
                location = emptyLocation;
            }
            locSet.add(location);
        }
        for (String location : locSet) {
            LocationHolder holder = distinctLocations.get(location);
            holder.splits.add(split);
        }
    }

    boolean groupByLength = conf.getBoolean(TEZ_GROUPING_SPLIT_BY_LENGTH, TEZ_GROUPING_SPLIT_BY_LENGTH_DEFAULT);
    boolean groupByCount = conf.getBoolean(TEZ_GROUPING_SPLIT_BY_COUNT, TEZ_GROUPING_SPLIT_BY_COUNT_DEFAULT);
    boolean nodeLocalOnly = conf.getBoolean(TEZ_GROUPING_NODE_LOCAL_ONLY, TEZ_GROUPING_NODE_LOCAL_ONLY_DEFAULT);
    if (!(groupByLength || groupByCount)) {
        throw new TezUncheckedException("None of the grouping parameters are true: "
                + TEZ_GROUPING_SPLIT_BY_LENGTH + ", " + TEZ_GROUPING_SPLIT_BY_COUNT);
    }
    LOG.info("Desired numSplits: " + desiredNumSplits + " lengthPerGroup: " + lengthPerGroup + " numLocations: "
            + numNodeLocations + " numSplitsPerLocation: " + numSplitsPerLocation + " numSplitsInGroup: "
            + numSplitsInGroup + " totalLength: " + totalLength + " numOriginalSplits: " + originalSplits.size()
            + " . Grouping by length: " + groupByLength + " count: " + groupByCount + " nodeLocalOnly: "
            + nodeLocalOnly);

    // go through locations and group splits
    int splitsProcessed = 0;
    List<SplitContainer> group = new ArrayList<SplitContainer>(numSplitsInGroup);
    Set<String> groupLocationSet = new HashSet<String>(10);
    boolean allowSmallGroups = false;
    boolean doingRackLocal = false;
    int iterations = 0;
    while (splitsProcessed < originalSplits.size()) {
        iterations++;
        int numFullGroupsCreated = 0;
        for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
            group.clear();
            groupLocationSet.clear();
            String location = entry.getKey();
            LocationHolder holder = entry.getValue();
            SplitContainer splitContainer = holder.getUnprocessedHeadSplit();
            if (splitContainer == null) {
                // all splits on node processed
                continue;
            }
            int oldHeadIndex = holder.headIndex;
            long groupLength = 0;
            int groupNumSplits = 0;
            do {
                group.add(splitContainer);
                groupLength += estimator.getEstimatedSize(splitContainer);
                groupNumSplits++;
                holder.incrementHeadIndex();
                splitContainer = holder.getUnprocessedHeadSplit();
            } while (splitContainer != null
                    && (!groupByLength
                            || (groupLength + estimator.getEstimatedSize(splitContainer) <= lengthPerGroup))
                    && (!groupByCount || (groupNumSplits + 1 <= numSplitsInGroup)));

            if (holder.isEmpty() && !allowSmallGroups && (!groupByLength || groupLength < lengthPerGroup / 2)
                    && (!groupByCount || groupNumSplits < numSplitsInGroup / 2)) {
                // group too small, reset it
                holder.headIndex = oldHeadIndex;
                continue;
            }

            numFullGroupsCreated++;

            // One split group created
            String[] groupLocation = { location };
            if (location == emptyLocation) {
                groupLocation = null;
            } else if (doingRackLocal) {
                for (SplitContainer splitH : group) {
                    String[] locations = locationProvider.getPreferredLocations(splitH);
                    if (locations != null) {
                        for (String loc : locations) {
                            if (loc != null) {
                                groupLocationSet.add(loc);
                            }
                        }
                    }
                }
                groupLocation = groupLocationSet.toArray(groupLocation);
            }
            GroupedSplitContainer groupedSplit = new GroupedSplitContainer(group.size(), wrappedInputFormatName,
                    groupLocation,
                    // pass rack local hint directly to AM
                    ((doingRackLocal && location != emptyLocation) ? location : null));
            for (SplitContainer groupedSplitContainer : group) {
                groupedSplit.addSplit(groupedSplitContainer);
                Preconditions.checkState(groupedSplitContainer.isProcessed() == false,
                        "Duplicates in grouping at location: " + location);
                groupedSplitContainer.setIsProcessed(true);
                splitsProcessed++;
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Grouped " + group.size() + " length: " + groupedSplit.getLength() + " split at: "
                        + location);
            }
            groupedSplits.add(groupedSplit);
        }

        if (!doingRackLocal && numFullGroupsCreated < 1) {
            // no node could create a regular node-local group.

            // Allow small groups if that is configured.
            if (nodeLocalOnly && !allowSmallGroups) {
                LOG.info(
                        "Allowing small groups early after attempting to create full groups at iteration: {}, groupsCreatedSoFar={}",
                        iterations, groupedSplits.size());
                allowSmallGroups = true;
                continue;
            }

            // else go rack-local
            doingRackLocal = true;
            // re-create locations
            int numRemainingSplits = originalSplits.size() - splitsProcessed;
            Set<SplitContainer> remainingSplits = new HashSet<SplitContainer>(numRemainingSplits);
            // gather remaining splits.
            for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
                LocationHolder locHolder = entry.getValue();
                while (!locHolder.isEmpty()) {
                    SplitContainer splitHolder = locHolder.getUnprocessedHeadSplit();
                    if (splitHolder != null) {
                        remainingSplits.add(splitHolder);
                        locHolder.incrementHeadIndex();
                    }
                }
            }
            if (remainingSplits.size() != numRemainingSplits) {
                throw new TezUncheckedException(
                        "Expected: " + numRemainingSplits + " got: " + remainingSplits.size());
            }

            // doing all this now instead of up front because the number of remaining
            // splits is expected to be much smaller
            RackResolver.init(conf);
            Map<String, String> locToRackMap = new HashMap<String, String>(distinctLocations.size());
            Map<String, LocationHolder> rackLocations = createLocationsMap(conf);
            for (String location : distinctLocations.keySet()) {
                String rack = emptyLocation;
                if (location != emptyLocation) {
                    rack = RackResolver.resolve(location).getNetworkLocation();
                }
                locToRackMap.put(location, rack);
                if (rackLocations.get(rack) == null) {
                    // splits will probably be located in all racks
                    rackLocations.put(rack, new LocationHolder(numRemainingSplits));
                }
            }
            distinctLocations.clear();
            HashSet<String> rackSet = new HashSet<String>(rackLocations.size());
            int numRackSplitsToGroup = remainingSplits.size();
            for (SplitContainer split : originalSplits) {
                if (numRackSplitsToGroup == 0) {
                    break;
                }
                // Iterate through the original splits in their order and consider them for grouping.
                // This maintains the original ordering in the list and thus subsequent grouping will
                // maintain that order
                if (!remainingSplits.contains(split)) {
                    continue;
                }
                numRackSplitsToGroup--;
                rackSet.clear();
                String[] locations = locationProvider.getPreferredLocations(split);
                if (locations == null || locations.length == 0) {
                    locations = emptyLocations;
                }
                for (String location : locations) {
                    if (location == null) {
                        location = emptyLocation;
                    }
                    rackSet.add(locToRackMap.get(location));
                }
                for (String rack : rackSet) {
                    rackLocations.get(rack).splits.add(split);
                }
            }

            remainingSplits.clear();
            distinctLocations = rackLocations;
            // adjust split length to be smaller because the data is non local
            float rackSplitReduction = conf.getFloat(TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION,
                    TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION_DEFAULT);
            if (rackSplitReduction > 0) {
                long newLengthPerGroup = (long) (lengthPerGroup * rackSplitReduction);
                int newNumSplitsInGroup = (int) (numSplitsInGroup * rackSplitReduction);
                if (newLengthPerGroup > 0) {
                    lengthPerGroup = newLengthPerGroup;
                }
                if (newNumSplitsInGroup > 0) {
                    numSplitsInGroup = newNumSplitsInGroup;
                }
            }

            LOG.info("Doing rack local after iteration: " + iterations + " splitsProcessed: " + splitsProcessed
                    + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplits.size()
                    + " lengthPerGroup: " + lengthPerGroup + " numSplitsInGroup: " + numSplitsInGroup);

            // dont do smallGroups for the first pass
            continue;
        }

        if (!allowSmallGroups && numFullGroupsCreated <= numNodeLocations / 10) {
            // a few nodes have a lot of data or data is thinly spread across nodes
            // so allow small groups now
            allowSmallGroups = true;
            LOG.info("Allowing small groups after iteration: " + iterations + " splitsProcessed: "
                    + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                    + groupedSplits.size());
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("Iteration: " + iterations + " splitsProcessed: " + splitsProcessed
                    + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                    + groupedSplits.size());
        }
    }
    LOG.info("Number of splits desired: " + desiredNumSplits + " created: " + groupedSplits.size()
            + " splitsProcessed: " + splitsProcessed);
    return groupedSplits;
}

From source file:org.apache.tez.runtime.library.broadcast.input.BroadcastInputManager.java

License:Apache License

public BroadcastInputManager(String uniqueIdentifier, Configuration conf) {
    this.conf = conf;

    this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier);
    this.localDirAllocator = new LocalDirAllocator(TezJobConfig.LOCAL_DIRS);

    // Setup configuration
    final float maxInMemCopyUse = conf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }//from   w ww .  ja  v a  2s  .  co  m

    // Allow unit tests to fix Runtime memory
    this.memoryLimit = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE)) * maxInMemCopyUse);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent);

    LOG.info("BroadcastInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: "
            + this.maxSingleShuffleLimit);
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.MergeManager.java

License:Apache License

public MergeManager(Configuration conf, FileSystem localFS, LocalDirAllocator localDirAllocator,
        TezInputContext inputContext, Combiner combiner, TezCounter spilledRecordsCounter,
        TezCounter reduceCombineInputCounter, TezCounter mergedMapOutputsCounter,
        ExceptionReporter exceptionReporter) {
    this.inputContext = inputContext;
    this.conf = conf;
    this.localDirAllocator = localDirAllocator;
    this.exceptionReporter = exceptionReporter;

    this.combiner = combiner;

    this.reduceCombineInputCounter = reduceCombineInputCounter;
    this.spilledRecordsCounter = spilledRecordsCounter;
    this.mergedMapOutputsCounter = mergedMapOutputsCounter;
    this.mapOutputFile = new TezTaskOutputFiles(conf, inputContext.getUniqueIdentifier());

    this.localFS = localFS;
    this.rfs = ((LocalFileSystem) localFS).getRaw();

    if (ConfigUtils.isIntermediateInputCompressed(conf)) {
        Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(conf,
                DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
    } else {//from  w w w.j av  a2  s.  c  o m
        codec = null;
    }
    this.ifileReadAhead = conf.getBoolean(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
            TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
    if (this.ifileReadAhead) {
        this.ifileReadAheadLength = conf.getInt(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
                TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
    } else {
        this.ifileReadAheadLength = 0;
    }
    this.ifileBufferSize = conf.getInt("io.file.buffer.size",
            TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);

    final float maxInMemCopyUse = conf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }

    // Allow unit tests to fix Runtime memory
    this.memoryLimit = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE)) * maxInMemCopyUse);

    this.ioSortFactor = conf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_IO_SORT_FACTOR);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent);
    this.memToMemMergeOutputsThreshold = conf.getInt(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS,
            ioSortFactor);
    this.mergeThreshold = (long) (this.memoryLimit
            * conf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT,
                    TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT));
    LOG.info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit="
            + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor="
            + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold);

    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
        throw new RuntimeException("Invlaid configuration: "
                + "maxSingleShuffleLimit should be less than mergeThreshold" + "maxSingleShuffleLimit: "
                + this.maxSingleShuffleLimit + "mergeThreshold: " + this.mergeThreshold);
    }

    boolean allowMemToMemMerge = conf.getBoolean(TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM);
    if (allowMemToMemMerge) {
        this.memToMemMerger = new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold);
        this.memToMemMerger.start();
    } else {
        this.memToMemMerger = null;
    }

    this.inMemoryMerger = new InMemoryMerger(this);
    this.inMemoryMerger.start();

    this.onDiskMerger = new OnDiskMerger(this);
    this.onDiskMerger.start();
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator.java

License:Apache License

public SimpleFetchedInputAllocator(String uniqueIdentifier, Configuration conf, long maxTaskAvailableMemory,
        long memoryAvailable) {
    this.conf = conf;
    this.maxAvailableTaskMemory = maxTaskAvailableMemory;
    this.initialMemoryAvailable = memoryAvailable;

    this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier);
    this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);

    // Setup configuration
    final float maxInMemCopyUse = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }// w ww .ja  v a  2 s . com

    long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse);

    if (memReq <= this.initialMemoryAvailable) {
        this.memoryLimit = memReq;
    } else {
        this.memoryLimit = initialMemoryAvailable;
    }

    LOG.info("RequestedMem=" + memReq + ", Allocated: " + this.memoryLimit);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    //TODO: cap it to MAX_VALUE until MemoryFetchedInput can handle > 2 GB
    this.maxSingleShuffleLimit = (long) Math.min((memoryLimit * singleShuffleMemoryLimitPercent),
            Integer.MAX_VALUE);

    LOG.info("SimpleInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: "
            + this.maxSingleShuffleLimit);
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator.java

License:Apache License

@Private
public static long getInitialMemoryReq(Configuration conf, long maxAvailableTaskMemory) {
    final float maxInMemCopyUse = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }//w ww  .  j  a  v  a 2  s. c  om
    long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse);
    return memReq;
}