org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceTaskNumProcessor.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceTaskNumProcessor.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.eagle.jpm.analyzer.mr.suggestion;

import org.apache.commons.io.FileUtils;
import org.apache.eagle.jpm.analyzer.Processor;
import org.apache.eagle.jpm.analyzer.meta.model.MapReduceAnalyzerEntity;
import org.apache.eagle.jpm.analyzer.publisher.Result;
import org.apache.eagle.jpm.util.jobcounter.JobCounters;

import java.util.ArrayList;
import java.util.List;

import static org.apache.hadoop.mapreduce.MRJobConfig.NUM_REDUCES;

public class MapReduceTaskNumProcessor implements Processor<MapReduceAnalyzerEntity> {
    private static final String[] SIZE_UNITS = { "B", "K", "M", "G", "T", "P" };
    private MapReduceJobSuggestionContext context;

    public MapReduceTaskNumProcessor(MapReduceJobSuggestionContext context) {
        this.context = context;
    }

    @Override
    public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) {
        StringBuilder sb = new StringBuilder();
        List<String> optSettings = new ArrayList<>();
        try {
            sb.append(analyzeReduceTaskNum(optSettings));
            sb.append(analyzeMapTaskNum(optSettings));

            if (sb.length() > 0) {
                return new Result.ProcessorResult(Result.RuleType.TASK_NUMBER, Result.ResultLevel.INFO,
                        sb.toString(), optSettings);
            }
        } catch (NullPointerException e) {
            // When job failed there may not have counters, so just ignore it
        }
        return null;
    }

    private String analyzeReduceTaskNum(List<String> optSettings) {
        StringBuilder sb = new StringBuilder();

        long numReduces = context.getNumReduces();
        if (numReduces > 0) {
            long avgReduceTime = context.getAvgReduceTimeInSec();
            long avgShuffleTime = context.getAvgShuffleTimeInSec();
            long avgShuffleBytes = context.getJob().getReduceCounters()
                    .getCounterValue(JobCounters.CounterName.REDUCE_SHUFFLE_BYTES) / numReduces;
            long avgReduceOutput = context.getJob().getReduceCounters()
                    .getCounterValue(JobCounters.CounterName.HDFS_BYTES_WRITTEN) / numReduces;
            long avgReduceTotalTime = avgShuffleTime + avgReduceTime;

            long suggestReduces = 0;
            StringBuilder tmpsb = new StringBuilder();

            String avgShuffleDisplaySize = bytesToHumanReadable(avgShuffleBytes);
            if (avgShuffleBytes < 256 * FileUtils.ONE_MB && avgReduceTotalTime < 300
                    && avgReduceOutput < 256 * FileUtils.ONE_MB && numReduces > 1) {
                tmpsb.append("average reduce input bytes is: ").append(avgShuffleDisplaySize).append(", ");
                suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
            } else if (avgShuffleBytes > 10 * FileUtils.ONE_GB && avgReduceTotalTime > 1800) {
                tmpsb.append("average reduce input bytes is: ").append(avgShuffleDisplaySize).append(", ");
                suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
            }

            if (avgReduceTotalTime < 60 && numReduces > 1) {
                tmpsb.append("average reduce time is only ").append(avgReduceTotalTime).append(" seconds, ");
                if (suggestReduces == 0) {
                    suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
                }
            } else if (avgReduceTotalTime > 3600 && avgReduceTime > 1800) {
                tmpsb.append("average reduce time is ").append(avgReduceTotalTime).append(" seconds, ");
                if (suggestReduces == 0) {
                    suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
                }
            }

            String avgReduceOutputDisplaySize = bytesToHumanReadable(avgReduceOutput);
            if (avgReduceOutput < 10 * FileUtils.ONE_MB && avgReduceTime < 300
                    && avgShuffleBytes < 2 * FileUtils.ONE_GB && numReduces > 1) {
                tmpsb.append(" average reduce output is only ").append(avgReduceOutputDisplaySize).append(", ");
                if (suggestReduces == 0) {
                    suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
                }
            } else if (avgReduceOutput > 10 * FileUtils.ONE_GB && avgReduceTime > 1800) {
                tmpsb.append(" average reduce output is ").append(avgReduceOutputDisplaySize).append(", ");
                if (suggestReduces == 0) {
                    suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
                }
            }

            if (suggestReduces > 0) {
                sb.append("Best practice: ").append(tmpsb.toString()).append("please consider ");
                if (suggestReduces > numReduces) {
                    sb.append("increasing the ");
                } else {
                    sb.append("decreasing the ");
                }
                String setting = String.format("-D%s=%s", NUM_REDUCES, suggestReduces);
                sb.append("reducer number. You could try ").append(setting).append("\n");
                optSettings.add(setting);
            }
        }
        return sb.toString();
    }

    private String analyzeMapTaskNum(List<String> optSettings) {
        StringBuilder sb = new StringBuilder();

        long numMaps = context.getNumMaps();
        long avgMapTime = context.getAvgMapTimeInSec();
        long avgMapInput = context.getJob().getMapCounters()
                .getCounterValue(JobCounters.CounterName.HDFS_BYTES_READ) / numMaps;
        String avgMapInputDisplaySize = bytesToHumanReadable(avgMapInput);

        if (avgMapInput < 5 * FileUtils.ONE_MB && avgMapTime < 30 && numMaps > 1) {
            sb.append("Best practice: average map input bytes only have ").append(avgMapInputDisplaySize);
            sb.append(". Please reduce the number of mappers by merging input files.\n");
        } else if (avgMapInput > FileUtils.ONE_GB) {
            sb.append("Best practice: average map input bytes have ").append(avgMapInputDisplaySize);
            sb.append(
                    ". Please increase the number of mappers by using splittable compression, a container file format or a smaller block size.\n");
        }

        if (avgMapTime < 10 && numMaps > 1) {
            sb.append("Best practice: average map time only have ").append(avgMapTime);
            sb.append(
                    " seconds. Please reduce the number of mappers by merging input files or by using a larger block size.\n");
        } else if (avgMapTime > 600 && avgMapInput < FileUtils.ONE_GB) {
            sb.append("Best practice: average map time is ").append(avgMapInput);
            sb.append(
                    " seconds. Please increase the number of mappers by using splittable compression, a container file format or a smaller block size.\n");
        }

        return sb.toString();
    }

    private long getReduceNum(long avgInputBytes, long avgOutputBytes, long avgTime) {
        long newReduceNum = 1;
        long tmpReduceNum;

        long numReduces = context.getNumReduces();
        tmpReduceNum = avgInputBytes * numReduces / (3 * FileUtils.ONE_GB);
        if (tmpReduceNum > newReduceNum) {
            newReduceNum = tmpReduceNum;
        }

        tmpReduceNum = avgOutputBytes * numReduces / (2 * FileUtils.ONE_GB);
        if (tmpReduceNum > newReduceNum) {
            newReduceNum = tmpReduceNum;
        }

        tmpReduceNum = avgTime * numReduces / (10 * 60);
        if (tmpReduceNum > newReduceNum) {
            newReduceNum = tmpReduceNum;
        }

        return newReduceNum;
    }

    private static String bytesToHumanReadable(long bytes) {
        double val = bytes;
        int idx = 0;
        while (val >= 1024) {
            val /= 1024.0;
            idx += 1;
        }
        StringBuilder sb = new StringBuilder();
        sb.append((int) Math.floor(val));
        sb.append(SIZE_UNITS[idx]);
        int tmp = (int) (1000 * val) % 1000;
        if (idx >= 1 && tmp > 0) {
            sb.append(tmp);
            sb.append(SIZE_UNITS[idx - 1]);
        }
        return sb.toString();
    }

}