Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.alibaba.wasp.master.balancer; import com.alibaba.wasp.EntityGroupInfo; import com.alibaba.wasp.ServerName; import com.alibaba.wasp.master.EntityGroupPlan; import com.google.common.collect.MinMaxPriorityQueue; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Random; import java.util.TreeMap; /** * Makes decisions about the placement and movement of EntityGroups across * FServers. * * <p> * Cluster-wide load balancing will occur only when there are no entityGroups in * transition and according to a fixed period of a time using * {@link #balanceCluster(java.util.Map)}. * * <p> * Inline entityGroup placement with {@link #immediateAssignment} can be used * when the Master needs to handle closed entityGroups that it currently does * not have a destination set for. This can happen during master failover. * * <p> * On cluster startup, bulk assignment can be used to determine locations for * all EntityGroups in a cluster. * * <p> * This classes produces plans for the {@link EntityGroupManager} to execute. */ public class DefaultLoadBalancer extends BaseLoadBalancer { private static final Log LOG = LogFactory.getLog(DefaultLoadBalancer.class); private static final Random RANDOM = new Random(System.currentTimeMillis()); private EntityGroupInfoComparator riComparator = new EntityGroupInfoComparator(); private EntityGroupPlan.EntityGroupPlanComparator rpComparator = new EntityGroupPlan.EntityGroupPlanComparator(); /** * Stores additional per-server information about the entityGroups added/removed * during the run of the balancing algorithm. * * For servers that shed entityGroups, we need to track which entityGroups we have * already shed. <b>nextEntityGroupForUnload</b> contains the index in the list of * entityGroups on the server that is the next to be shed. */ static class BalanceInfo { private final int nextEntityGroupForUnload; private int numEntityGroupsAdded; public BalanceInfo(int nextEntityGroupForUnload, int numEntityGroupsAdded) { this.nextEntityGroupForUnload = nextEntityGroupForUnload; this.numEntityGroupsAdded = numEntityGroupsAdded; } int getNextEntityGroupForUnload() { return nextEntityGroupForUnload; } int getNumEntityGroupsAdded() { return numEntityGroupsAdded; } void setNumEntityGroupsAdded(int numAdded) { this.numEntityGroupsAdded = numAdded; } } /** * Generate a global load balancing plan according to the specified map of * server information to the most loaded entityGroups of each server. * * The load balancing invariant is that all servers are within 1 entityGroup of the * average number of entityGroups per server. If the average is an integer number, * all servers will be balanced to the average. Otherwise, all servers will * have either floor(average) or ceiling(average) entityGroups. * * HBASE-3609 Modeled entityGroupsToMove using Guava's MinMaxPriorityQueue so that * we can fetch from both ends of the queue. At the beginning, we check * whether there was empty entityGroup server just discovered by Master. If so, we * alternately choose new / old entityGroups from head / tail of entityGroupsToMove, * respectively. This alternation avoids clustering young entityGroups on the newly * discovered entityGroup server. Otherwise, we choose new entityGroups from head of * entityGroupsToMove. * * Another improvement from HBASE-3609 is that we assign entityGroups from * entityGroupsToMove to underloaded servers in round-robin fashion. Previously one * underloaded server would be filled before we move onto the next underloaded * server, leading to clustering of young entityGroups. * * Finally, we randomly shuffle underloaded servers so that they receive * offloaded entityGroups relatively evenly across calls to balanceCluster(). * * The algorithm is currently implemented as such: * * <ol> * <li>Determine the two valid numbers of entityGroups each server should have, * <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average). * * <li>Iterate down the most loaded servers, shedding entityGroups from each so * each server hosts exactly <b>MAX</b> entityGroups. Stop once you reach a server * that already has <= <b>MAX</b> entityGroups. * <p> * Order the entityGroups to move from most recent to least. * * <li>Iterate down the least loaded servers, assigning entityGroups so each server * has exactly </b>MIN</b> entityGroups. Stop once you reach a server that already * has >= <b>MIN</b> entityGroups. * * EntityGroups being assigned to underloaded servers are those that were shed in * the previous step. It is possible that there were not enough entityGroups shed * to fill each underloaded server to <b>MIN</b>. If so we end up with a * number of entityGroups required to do so, <b>neededEntityGroups</b>. * * It is also possible that we were able to fill each underloaded but ended up * with entityGroups that were unassigned from overloaded servers but that still do * not have assignment. * * If neither of these conditions hold (no entityGroups needed to fill the * underloaded servers, no entityGroups leftover from overloaded servers), we are * done and return. Otherwise we handle these cases below. * * <li>If <b>neededEntityGroups</b> is non-zero (still have underloaded servers), * we iterate the most loaded servers again, shedding a single server from * each (this brings them from having <b>MAX</b> entityGroups to having <b>MIN</b> * entityGroups). * * <li>We now definitely have more entityGroups that need assignment, either from * the previous step or from the original shedding from overloaded servers. * Iterate the least loaded servers filling each to <b>MIN</b>. * * <li>If we still have more entityGroups that need assignment, again iterate the * least loaded servers, this time giving each one (filling them to * </b>MAX</b>) until we run out. * * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> entityGroups. * * In addition, any server hosting >= <b>MAX</b> entityGroups is guaranteed to * end up with <b>MAX</b> entityGroups at the end of the balancing. This ensures * the minimal number of entityGroups possible are moved. * </ol> * * TODO: We can at-most reassign the number of entityGroups away from a particular * server to be how many they report as most loaded. Should we just keep all * assignment in memory? Any objections? Does this mean we need HeapSize on * HMaster? Or just careful monitor? (current thinking is we will hold all * assignments in memory) * * @param clusterState Map of entityGroupservers and their load/entityGroup information * to a list of their most loaded entityGroups * @return a list of entityGroups to be moved, including source and destination, or * null if cluster is already balanced */ public List<EntityGroupPlan> balanceCluster(Map<ServerName, List<EntityGroupInfo>> clusterMap) { boolean emptyFServerPresent = false; long startTime = System.currentTimeMillis(); ClusterLoadState cs = new ClusterLoadState(clusterMap); int numServers = cs.getNumServers(); if (numServers == 0) { LOG.debug("numServers=0 so skipping load balancing"); return null; } NavigableMap<ServerAndLoad, List<EntityGroupInfo>> serversByLoad = cs.getServersByLoad(); int numEntityGroups = cs.getNumEntityGroups(); if (!this.needsBalance(cs)) { // Skipped because no server outside (min,max) range float average = cs.getLoadAverage(); // for logging LOG.info("Skipping load balancing because balanced cluster; " + "servers=" + numServers + " " + "entityGroups=" + numEntityGroups + " average=" + average + " " + "mostloaded=" + serversByLoad.lastKey().getLoad() + " leastloaded=" + serversByLoad.firstKey().getLoad()); return null; } int min = numEntityGroups / numServers; int max = numEntityGroups % numServers == 0 ? min : min + 1; // Using to check balance result. StringBuilder strBalanceParam = new StringBuilder(); strBalanceParam.append("Balance parameter: numEntityGroups=").append(numEntityGroups) .append(", numServers=").append(numServers).append(", max=").append(max).append(", min=") .append(min); LOG.debug(strBalanceParam.toString()); // Balance the cluster // TODO: Look at data block locality or a more complex load to do this MinMaxPriorityQueue<EntityGroupPlan> entityGroupsToMove = MinMaxPriorityQueue.orderedBy(rpComparator) .create(); List<EntityGroupPlan> entityGroupsToReturn = new ArrayList<EntityGroupPlan>(); // Walk down most loaded, pruning each to the max int serversOverloaded = 0; // flag used to fetch entityGroups from head and tail of list, alternately boolean fetchFromTail = false; Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>(); for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.descendingMap().entrySet()) { ServerAndLoad sal = server.getKey(); int entityGroupCount = sal.getLoad(); if (entityGroupCount <= max) { serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0)); break; } serversOverloaded++; List<EntityGroupInfo> entityGroups = server.getValue(); int numToOffload = Math.min(entityGroupCount - max, entityGroups.size()); // account for the out-of-band entityGroups which were assigned to this server // after some other entityGroup server crashed Collections.sort(entityGroups, riComparator); int numTaken = 0; for (int i = 0; i <= numToOffload;) { EntityGroupInfo egInfo = entityGroups.get(i); // fetch from head if (fetchFromTail) { egInfo = entityGroups.get(entityGroups.size() - 1 - i); } i++; entityGroupsToMove.add(new EntityGroupPlan(egInfo, sal.getServerName(), null)); numTaken++; if (numTaken >= numToOffload) break; // fetch in alternate order if there is new entityGroup server if (emptyFServerPresent) { fetchFromTail = !fetchFromTail; } } serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken)); } int totalNumMoved = entityGroupsToMove.size(); // Walk down least loaded, filling each to the min int neededEntityGroups = 0; // number of entityGroups needed to bring all up to min fetchFromTail = false; Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>(); for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) { int entityGroupCount = server.getKey().getLoad(); if (entityGroupCount >= min) { break; } underloadedServers.put(server.getKey().getServerName(), min - entityGroupCount); } // number of servers that get new entityGroups int serversUnderloaded = underloadedServers.size(); int incr = 1; List<ServerName> sns = Arrays .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded])); Collections.shuffle(sns, RANDOM); while (entityGroupsToMove.size() > 0) { int cnt = 0; int i = incr > 0 ? 0 : underloadedServers.size() - 1; for (; i >= 0 && i < underloadedServers.size(); i += incr) { if (entityGroupsToMove.isEmpty()) break; ServerName si = sns.get(i); int numToTake = underloadedServers.get(si); if (numToTake == 0) continue; addEntityGroupPlan(entityGroupsToMove, fetchFromTail, si, entityGroupsToReturn); if (emptyFServerPresent) { fetchFromTail = !fetchFromTail; } underloadedServers.put(si, numToTake - 1); cnt++; BalanceInfo bi = serverBalanceInfo.get(si); if (bi == null) { bi = new BalanceInfo(0, 0); serverBalanceInfo.put(si, bi); } bi.setNumEntityGroupsAdded(bi.getNumEntityGroupsAdded() + 1); } if (cnt == 0) break; // iterates underloadedServers in the other direction incr = -incr; } for (Integer i : underloadedServers.values()) { // If we still want to take some, increment needed neededEntityGroups += i; } // If none needed to fill all to min and none left to drain all to max, // we are done if (neededEntityGroups == 0 && entityGroupsToMove.isEmpty()) { long endTime = System.currentTimeMillis(); LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " entityGroups off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return entityGroupsToReturn; } // Need to do a second pass. // Either more entityGroups to assign out or servers that are still underloaded // If we need more to fill min, grab one from each most loaded until enough if (neededEntityGroups != 0) { // Walk down most loaded, grabbing one from each until we get enough for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.descendingMap() .entrySet()) { BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); int idx = balanceInfo == null ? 0 : balanceInfo.getNextEntityGroupForUnload(); if (idx >= server.getValue().size()) break; EntityGroupInfo entityGroup = server.getValue().get(idx); entityGroupsToMove.add(new EntityGroupPlan(entityGroup, server.getKey().getServerName(), null)); totalNumMoved++; if (--neededEntityGroups == 0) { // No more entityGroups needed, done shedding break; } } } // Now we have a set of entityGroups that must be all assigned out // Assign each underloaded up to the min, then if leftovers, assign to max // Walk down least loaded, assigning to each to fill up to min for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) { int entityGroupCount = server.getKey().getLoad(); if (entityGroupCount >= min) break; BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); if (balanceInfo != null) { entityGroupCount += balanceInfo.getNumEntityGroupsAdded(); } if (entityGroupCount >= min) { continue; } int numToTake = min - entityGroupCount; int numTaken = 0; while (numTaken < numToTake && 0 < entityGroupsToMove.size()) { addEntityGroupPlan(entityGroupsToMove, fetchFromTail, server.getKey().getServerName(), entityGroupsToReturn); numTaken++; if (emptyFServerPresent) { fetchFromTail = !fetchFromTail; } } } // If we still have entityGroups to dish out, assign underloaded to max if (0 < entityGroupsToMove.size()) { for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) { int entityGroupCount = server.getKey().getLoad(); if (entityGroupCount >= max) { break; } addEntityGroupPlan(entityGroupsToMove, fetchFromTail, server.getKey().getServerName(), entityGroupsToReturn); if (emptyFServerPresent) { fetchFromTail = !fetchFromTail; } if (entityGroupsToMove.isEmpty()) { break; } } } long endTime = System.currentTimeMillis(); if (!entityGroupsToMove.isEmpty() || neededEntityGroups != 0) { // Emit data so can diagnose how balancer went astray. LOG.warn("entityGroupsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded); StringBuilder sb = new StringBuilder(); for (Map.Entry<ServerName, List<EntityGroupInfo>> e : clusterMap.entrySet()) { if (sb.length() > 0) sb.append(", "); sb.append(e.getKey().toString()); sb.append(" "); sb.append(e.getValue().size()); } LOG.warn("Input " + sb.toString()); } // All done! LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " entityGroups off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return entityGroupsToReturn; } /** * Add a entityGroup from the head or tail to the List of entityGroups to return. */ private void addEntityGroupPlan(final MinMaxPriorityQueue<EntityGroupPlan> entityGroupsToMove, final boolean fetchFromTail, final ServerName sn, List<EntityGroupPlan> entityGroupsToReturn) { EntityGroupPlan rp = null; if (!fetchFromTail) rp = entityGroupsToMove.remove(); else rp = entityGroupsToMove.removeLast(); rp.setDestination(sn); entityGroupsToReturn.add(rp); } }