org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceSpillProcessor.java Source code

Introduction

Here is the source code for org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceSpillProcessor.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.eagle.jpm.analyzer.mr.suggestion;

import org.apache.commons.io.FileUtils;
import org.apache.eagle.jpm.analyzer.Processor;
import org.apache.eagle.jpm.analyzer.meta.model.MapReduceAnalyzerEntity;
import org.apache.eagle.jpm.analyzer.publisher.Result;
import org.apache.eagle.jpm.util.jobcounter.JobCounters;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;

import static org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceJobSuggestionContext.MAX_HEAP_PATTERN;
import static org.apache.hadoop.mapreduce.MRJobConfig.IO_SORT_MB;
import static org.apache.hadoop.mapreduce.MRJobConfig.MAP_JAVA_OPTS;
import static org.apache.hadoop.mapreduce.MRJobConfig.MAP_SORT_SPILL_PERCENT;

/**
 * Check whether spilled more than once, if true, find out the minimum value of the memory to hold all the data,
 * based on that value, find out how much memory need for heap size.
 */
public class MapReduceSpillProcessor implements Processor<MapReduceAnalyzerEntity> {

    private MapReduceJobSuggestionContext context;

    public MapReduceSpillProcessor(MapReduceJobSuggestionContext context) {
        this.context = context;
    }

    @Override
    public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) {
        StringBuilder sb = new StringBuilder();
        List<String> optSettings = new ArrayList<>();
        String setting;

        long outputRecords = 0L; // Map output records
        long spillRecords = 0L; //  Spilled Records
        try {
            outputRecords = context.getJob().getMapCounters()
                    .getCounterValue(JobCounters.CounterName.MAP_OUTPUT_RECORDS);
            spillRecords = context.getJob().getMapCounters()
                    .getCounterValue(JobCounters.CounterName.SPILLED_RECORDS);

            if (outputRecords < spillRecords) {
                sb.append("Total map output records: ").append(outputRecords);
                sb.append(" Total map spilled records: ").append(spillRecords).append(". Please set");

                long minMapSpillMemBytes = context.getMinMapSpillMemBytes();
                double spillPercent = context.getJobconf().getDouble(MAP_SORT_SPILL_PERCENT, 0.8);
                if (minMapSpillMemBytes > 512 * FileUtils.ONE_MB * spillPercent) {
                    if (Math.abs(1.0 - spillPercent) > 0.001) {
                        setting = String.format("-D%s=1", MAP_SORT_SPILL_PERCENT);
                        sb.append(" ").append(setting);
                        optSettings.add(setting);
                    }
                } else {
                    minMapSpillMemBytes /= spillPercent;
                }

                long minMapSpillMemMB = (minMapSpillMemBytes / FileUtils.ONE_MB + 10) / 10 * 10;
                if (minMapSpillMemMB >= 2047) {
                    sb.append(
                            "\nPlease reduce the block size of the input files and make sure they are splittable.");
                } else {
                    setting = String.format("-D%s=%s", IO_SORT_MB, minMapSpillMemMB);
                    sb.append(" ").append(setting);
                    optSettings.add(setting);
                    long heapSize = getMaxHeapSize(context.getJobconf().get(MAP_JAVA_OPTS));
                    if (heapSize < 3 * minMapSpillMemMB) {
                        long expectedHeapSizeMB = (minMapSpillMemMB * 3 + 1024) / 1024 * 1024;
                        setting = String.format("-D%s=-Xmx%sM", MAP_JAVA_OPTS, expectedHeapSizeMB);
                        sb.append(" ").append(setting);
                        optSettings.add(setting);
                    }
                }
                sb.append(" to avoid spilled records.\n");
            }

            long reduceInputRecords = context.getJob().getReduceCounters()
                    .getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS);
            spillRecords = context.getJob().getReduceCounters()
                    .getCounterValue(JobCounters.CounterName.SPILLED_RECORDS);
            if (reduceInputRecords < spillRecords) {
                sb.append("Please add more memory (mapreduce.reduce.java.opts) to avoid spilled records.");
                sb.append(" Total Reduce input records: ").append(reduceInputRecords);
                sb.append(" Total Spilled Records: ").append(spillRecords);
                sb.append("\n");
            }

            if (sb.length() > 0) {
                return new Result.ProcessorResult(Result.RuleType.SPILL, Result.ResultLevel.INFO, sb.toString(),
                        optSettings);
            }
        } catch (NullPointerException e) {
            //When job failed there may not have counters, so just ignore it
        }
        return null;
    }

    private static long getMaxHeapSize(String s) {
        Matcher m = MAX_HEAP_PATTERN.matcher(s);
        long val = 0;
        if (m.find()) {
            val = Long.parseLong(m.group(1));
            if ("k".equalsIgnoreCase(m.group(2))) {
                val /= 1024;
            } else if ("g".equalsIgnoreCase(m.group(2))) {
                val *= 1024;
            }
        }
        return val;
    }
}