Java tutorial
/** * Copyright 2010 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.master; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.lang.mutable.MutableBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HMsg; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HServerLoad; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType; import org.apache.hadoop.hbase.executor.RegionTransitionEventData; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.hbase.zookeeper.LegacyRootZNodeUpdater; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper; import org.apache.hadoop.io.Text; import com.google.common.base.Preconditions; /** * Class to manage assigning regions to servers, state of root and meta, etc. */ public class RegionManager { protected static final Log LOG = LogFactory.getLog(RegionManager.class); private final AtomicReference<HServerInfo> rootRegionLocation = new AtomicReference<HServerInfo>(null); private final RootScanner rootScannerThread; final MetaScanner metaScannerThread; /** Set by root scanner to indicate the number of meta regions */ private final AtomicInteger numberOfMetaRegions = new AtomicInteger(); /** These are the online meta regions */ private final NavigableMap<byte[], MetaRegion> onlineMetaRegions = new ConcurrentSkipListMap<byte[], MetaRegion>( Bytes.BYTES_COMPARATOR); private final NavigableMap<byte[], MetaRegion> metaRegionLocationsBeforeScan = new TreeMap<byte[], MetaRegion>( Bytes.BYTES_COMPARATOR); private static final byte[] OVERLOADED = Bytes.toBytes("Overloaded"); private static final byte[] META_REGION_PREFIX = Bytes.toBytes(".META.,"); private final AssignmentManager assignmentManager; /** * Map key -> tableName, value -> ThrottledRegionReopener * An entry is created in the map before an alter operation is performed on the * table. It is cleared when all the regions have reopened. */ private final Map<String, ThrottledRegionReopener> tablesReopeningRegions = new ConcurrentHashMap<String, ThrottledRegionReopener>(); /** * Map of region name to RegionState for regions that are in transition such as * * unassigned -> pendingOpen -> open * closing -> pendingClose -> closed; if (closed && !offline) -> unassigned * * At the end of a transition, removeRegion is used to remove the region from * the map (since it is no longer in transition) * * Note: Needs to be SortedMap so we can specify a comparator * * @see RegionState inner-class below */ final SortedMap<String, RegionState> regionsInTransition = Collections .synchronizedSortedMap(new TreeMap<String, RegionState>()); /** Serves as a cache for locating where a particular region is open. * Currently being used to detect legitmate duplicate assignments from * spurious ones, that may seem to occur if a ZK notification is received * twice. * * maps regionName --> serverName * * Note: This is a temporary hack. Should be safe to remove once we get * rid of duplicate notifications from ZK. */ final ConcurrentMap<String, String> regionLocationHintToDetectDupAssignment = new ConcurrentHashMap<String, String>(); // regions in transition are also recorded in ZK using the zk wrapper final ZooKeeperWrapper zkWrapper; // How many regions to assign a server at a time. private final int maxAssignInOneGo; final HMaster master; private final LoadBalancer loadBalancer; /** Set of regions to split. */ private final SortedMap<byte[], Pair<HRegionInfo, HServerAddress>> regionsToSplit = Collections .synchronizedSortedMap(new TreeMap<byte[], Pair<HRegionInfo, HServerAddress>>(Bytes.BYTES_COMPARATOR)); /** Set of regions to compact. */ private final SortedMap<byte[], Pair<HRegionInfo, HServerAddress>> regionsToCompact = Collections .synchronizedSortedMap(new TreeMap<byte[], Pair<HRegionInfo, HServerAddress>>(Bytes.BYTES_COMPARATOR)); /** Set of column families to compact within a region. This map is a double SortedMap, first indexed on regionName and then indexed on column family name. This is done to facilitate the fact that we might want to perform a certain action on only a column family within a region. */ private final SortedMap<byte[], SortedMap<byte[], Pair<HRegionInfo, HServerAddress>>> cfsToCompact = Collections .synchronizedSortedMap(new TreeMap<byte[], SortedMap<byte[], Pair<HRegionInfo, HServerAddress>>>( Bytes.BYTES_COMPARATOR)); /** Set of column families to major compact within a region. This map is a double SortedMap, first indexed on regionName and then indexed on column family name. This is done to facilitate the fact that we might want to perform a certain action on only a column family within a region. */ private final SortedMap<byte[], SortedMap<byte[], Pair<HRegionInfo, HServerAddress>>> cfsToMajorCompact = Collections .synchronizedSortedMap(new TreeMap<byte[], SortedMap<byte[], Pair<HRegionInfo, HServerAddress>>>( Bytes.BYTES_COMPARATOR)); /** Set of regions to major compact. */ private final SortedMap<byte[], Pair<HRegionInfo, HServerAddress>> regionsToMajorCompact = Collections .synchronizedSortedMap(new TreeMap<byte[], Pair<HRegionInfo, HServerAddress>>(Bytes.BYTES_COMPARATOR)); /** Set of regions to flush. */ private final SortedMap<byte[], Pair<HRegionInfo, HServerAddress>> regionsToFlush = Collections .synchronizedSortedMap(new TreeMap<byte[], Pair<HRegionInfo, HServerAddress>>(Bytes.BYTES_COMPARATOR)); private final int zooKeeperNumRetries; private final int zooKeeperPause; /** * Set of region servers which send heart beat in the first period of time * during the master boots. Hold the best locality regions for these * region servers. */ private Set<String> quickStartRegionServerSet = new HashSet<String>(); private boolean stoppedScanners = false; private LegacyRootZNodeUpdater legacyRootZNodeUpdater; RegionManager(HMaster master) throws IOException { Configuration conf = master.getConfiguration(); this.master = master; this.zkWrapper = ZooKeeperWrapper.getInstance(conf, master.getZKWrapperName()); this.maxAssignInOneGo = conf.getInt("hbase.regions.percheckin", 10); if (master.shouldAssignRegionsWithFavoredNodes()) { this.loadBalancer = new AssignmentLoadBalancer(); } else { this.loadBalancer = new DefaultLoadBalancer(); } this.assignmentManager = new AssignmentManager(master); // The root region rootScannerThread = new RootScanner(master); // Scans the meta table metaScannerThread = new MetaScanner(master); zooKeeperNumRetries = conf.getInt(HConstants.ZOOKEEPER_RETRIES, HConstants.DEFAULT_ZOOKEEPER_RETRIES); zooKeeperPause = conf.getInt(HConstants.ZOOKEEPER_PAUSE, HConstants.DEFAULT_ZOOKEEPER_PAUSE); legacyRootZNodeUpdater = new LegacyRootZNodeUpdater(zkWrapper, master, rootRegionLocation); } public LoadBalancer getLoadBalancer() { return this.loadBalancer; } void start() { assignmentManager.start(); Threads.setDaemonThreadRunning(rootScannerThread, "RegionManager.rootScanner"); Threads.setDaemonThreadRunning(metaScannerThread, "RegionManager.metaScanner"); Threads.setDaemonThreadRunning(legacyRootZNodeUpdater, null); } public AssignmentManager getAssignmentManager() { return this.assignmentManager; } void unsetRootRegion() { synchronized (regionsInTransition) { synchronized (rootRegionLocation) { rootRegionLocation.set(null); rootRegionLocation.notifyAll(); } regionsInTransition.remove(HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString()); LOG.info("-ROOT- region unset (but not set to be reassigned)"); } } void reassignRootRegion() { unsetRootRegion(); if (!master.isClusterShutdownRequested()) { synchronized (regionsInTransition) { String regionName = HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString(); byte[] data = null; try { data = Writables.getBytes( new RegionTransitionEventData(HBaseEventType.M2ZK_REGION_OFFLINE, HMaster.MASTER)); } catch (IOException e) { LOG.error("Error creating event data for " + HBaseEventType.M2ZK_REGION_OFFLINE, e); } zkWrapper.createOrUpdateUnassignedRegion(HRegionInfo.ROOT_REGIONINFO.getEncodedName(), data); LOG.debug("Created UNASSIGNED zNode " + regionName + " in state " + HBaseEventType.M2ZK_REGION_OFFLINE); RegionState s = new RegionState(HRegionInfo.ROOT_REGIONINFO, RegionState.State.UNASSIGNED); regionsInTransition.put(regionName, s); LOG.info("ROOT inserted into regionsInTransition"); } } } /** * Assigns regions to region servers attempting to balance the load across all * region servers. Note that no synchronization is necessary as the caller * (ServerManager.processMsgs) already owns the monitor for the RegionManager. * * @param info * @param mostLoadedRegions * @param returnMsgs */ void assignRegions(HServerInfo info, HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) { if (this.master.getIsSplitLogAfterStartupDone() == false) { // wait for log splitting at startup to complete. The regions will // be assigned when the region server reports next return; } if (this.master.shouldAssignRegionsWithFavoredNodes()) { // assign regions with favored nodes assignRegionsWithFavoredNodes(info, mostLoadedRegions, returnMsgs); } else { // assign regions without favored nodes assignRegionsWithoutFavoredNodes(info, mostLoadedRegions, returnMsgs); } } private void assignRegionsWithFavoredNodes(HServerInfo regionServer, HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) { // get the regions that are waiting for assignment for that region server Set<RegionState> regionsToAssign = regionsAwaitingAssignment(regionServer); if (regionsToAssign.isEmpty() && master.getRegionServerOperationQueue().isEmpty() && !master.isLoadBalancerDisabled()) { // There are no regions waiting to be assigned. // load balance as before this.loadBalancer.loadBalancing(regionServer, mostLoadedRegions, returnMsgs); } else { assignRegionsToOneServer(regionsToAssign, regionServer, returnMsgs); } } /** * @return true if there is a single regionserver online, or if there is any other reason to * remove restrictions on assigning .META./-ROOT- to the same regionserver (e.g. if there * are blacklisted regionservers during testing). */ private boolean isSingleRegionServer() { // If there are blacklisted servers (unit tests only), treat the situation as if there is // just a single host, otherwise we might keep trying to assign regions to blacklisted // regionservers. return master.numServers() == 1 || master.getServerManager().hasBlacklistedServersInTest(); } private void assignRegionsWithoutFavoredNodes(HServerInfo info, HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) { // the region may assigned to this region server Set<RegionState> regionsToAssign = null; HServerLoad thisServersLoad = info.getLoad(); boolean isSingleServer = isSingleRegionServer(); boolean holdRegionForBestRegionServer = false; boolean assignmentByLocality = false; // only check assignmentByLocality when the // PreferredRegionToRegionServerMapping is not null; if (this.master.getPreferredRegionToRegionServerMapping() != null) { long masterRunningTime = System.currentTimeMillis() - this.master.getMasterStartupTime(); holdRegionForBestRegionServer = masterRunningTime < this.master.getHoldRegionForBestLocalityPeriod(); assignmentByLocality = masterRunningTime < this.master.getApplyPreferredAssignmentPeriod(); // once it has passed the ApplyPreferredAssignmentPeriod, clear up // the quickStartRegionServerSet and PreferredRegionToRegionServerMapping // and it won't check the assignmentByLocality anymore. if (!assignmentByLocality) { quickStartRegionServerSet = null; this.master.clearPreferredRegionToRegionServerMapping(); } } if (assignmentByLocality) { // have to add . at the end of host name String hostName = info.getHostname(); quickStartRegionServerSet.add(hostName); } // this variable keeps track of the code path to go through; if true, than // the server we are examining was registered as restarting and thus we // should assign all the regions to it directly; else, we should go through // the normal code path MutableBoolean preferredAssignment = new MutableBoolean(false); // get the region set to be assigned to this region server regionsToAssign = regionsAwaitingAssignment(info.getServerAddress(), isSingleServer, preferredAssignment, assignmentByLocality, holdRegionForBestRegionServer, quickStartRegionServerSet); if (regionsToAssign.isEmpty()) { // There are no regions waiting to be assigned. if (!assignmentByLocality && master.getRegionServerOperationQueue().isEmpty() && !master.isLoadBalancerDisabled()) { // load balance as before this.loadBalancer.loadBalancing(info, mostLoadedRegions, returnMsgs); } } else { // if there's only one server or assign the region by locality, // just give the regions to this server if (isSingleServer || assignmentByLocality || preferredAssignment.booleanValue()) { assignRegionsToOneServer(regionsToAssign, info, returnMsgs); } else { // otherwise, give this server a few regions taking into account the // load of all the other servers assignRegionsToMultipleServers(thisServersLoad, regionsToAssign, info, returnMsgs); } } } /* * Make region assignments taking into account multiple servers' loads. * * Note that no synchronization is needed while we iterate over * regionsInTransition because this method is only called by assignRegions * whose caller owns the monitor for RegionManager * * TODO: This code is unintelligible. REWRITE. Add TESTS! St.Ack 09/30/2009 * @param thisServersLoad * @param regionsToAssign * @param info * @param returnMsgs */ private void assignRegionsToMultipleServers(final HServerLoad thisServersLoad, final Set<RegionState> regionsToAssign, final HServerInfo info, final ArrayList<HMsg> returnMsgs) { boolean isMetaAssign = false; for (RegionState s : regionsToAssign) { if (s.getRegionInfo().isMetaRegion()) isMetaAssign = true; } int nRegionsToAssign = regionsToAssign.size(); int otherServersRegionsCount = regionsToGiveOtherServers(nRegionsToAssign, thisServersLoad); nRegionsToAssign -= otherServersRegionsCount; if (nRegionsToAssign > 0 || isMetaAssign) { LOG.debug("Assigning for " + info + ": total nregions to assign=" + nRegionsToAssign + ", regions to give other servers than this=" + otherServersRegionsCount + ", isMetaAssign=" + isMetaAssign); // See how many we can assign before this server becomes more heavily // loaded than the next most heavily loaded server. HServerLoad heavierLoad = new HServerLoad(); int nservers = computeNextHeaviestLoad(thisServersLoad, heavierLoad); int nregions = 0; // Advance past any less-loaded servers for (HServerLoad load = new HServerLoad(thisServersLoad); load.compareTo(heavierLoad) <= 0 && nregions < nRegionsToAssign; load .setNumberOfRegions(load.getNumberOfRegions() + 1), nregions++) { // continue; } if (nregions < nRegionsToAssign) { // There are some more heavily loaded servers // but we can't assign all the regions to this server. if (nservers > 0) { // There are other servers that can share the load. // Split regions that need assignment across the servers. nregions = (int) Math.ceil((1.0 * nRegionsToAssign) / (1.0 * nservers)); } else { // No other servers with same load. // Split regions over all available servers nregions = (int) Math .ceil((1.0 * nRegionsToAssign) / (1.0 * master.getServerManager().numServers())); } } else { // Assign all regions to this server nregions = nRegionsToAssign; } LOG.debug("Assigning " + info + " " + nregions + " regions"); assignRegions(regionsToAssign, nregions, info, returnMsgs); } } /* * Assign <code>nregions</code> regions. * @param regionsToAssign * @param nregions * @param info * @param returnMsgs */ private void assignRegions(final Set<RegionState> regionsToAssign, final int nregions, final HServerInfo info, final ArrayList<HMsg> returnMsgs) { int count = nregions; if (count > this.maxAssignInOneGo) { count = this.maxAssignInOneGo; } for (RegionState s : regionsToAssign) { doRegionAssignment(s, info, returnMsgs); if (--count <= 0) { break; } } } /* * Assign all to the only server. An unlikely case but still possible. * Note that no synchronization is needed on regionsInTransition while * iterating on it because the only caller is assignRegions whose caller owns * the monitor for RegionManager * * @param regionsToAssign * @param serverName * @param returnMsgs */ private void assignRegionsToOneServer(final Set<RegionState> regionsToAssign, final HServerInfo info, final ArrayList<HMsg> returnMsgs) { for (RegionState s : regionsToAssign) { doRegionAssignment(s, info, returnMsgs); } } /* * Do single region assignment. * @param rs * @param sinfo * @param returnMsgs */ private void doRegionAssignment(final RegionState rs, final HServerInfo sinfo, final ArrayList<HMsg> returnMsgs) { String regionName = rs.getRegionInfo().getRegionNameAsString(); LOG.info("Assigning region " + regionName + " to " + sinfo.getServerName()); rs.setPendingOpen(sinfo.getServerName()); synchronized (this.regionsInTransition) { byte[] data = null; try { data = Writables.getBytes( new RegionTransitionEventData(HBaseEventType.M2ZK_REGION_OFFLINE, HMaster.MASTER)); } catch (IOException e) { LOG.error("Error creating event data for " + HBaseEventType.M2ZK_REGION_OFFLINE, e); } zkWrapper.createOrUpdateUnassignedRegion(rs.getRegionInfo().getEncodedName(), data); LOG.debug("Created UNASSIGNED zNode " + regionName + " in state " + HBaseEventType.M2ZK_REGION_OFFLINE); this.regionsInTransition.put(regionName, rs); } if (assignmentManager.hasAssignmentFromPlan(rs.getRegionInfo())) { String favoredNodes = RegionPlacement .getFavoredNodes(assignmentManager.getAssignmentFromPlan(rs.regionInfo)); returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, rs.getRegionInfo(), favoredNodes.getBytes())); } else { returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, rs.getRegionInfo())); } // Now that we have told the server to open the region. Clean up the assignment plan. assignmentManager.removeTransientAssignment(sinfo.getServerAddress(), rs.regionInfo); } /* * @param nRegionsToAssign * @param thisServersLoad * @return How many regions should go to servers other than this one; i.e. * more lightly loaded servers */ private int regionsToGiveOtherServers(final int numUnassignedRegions, final HServerLoad thisServersLoad) { if (master.getServerManager().hasBlacklistedServersInTest()) { // For unit testing. Otherwise, we will always think we should give regions to blacklisted // servers, but will not actually assign any. return 0; } SortedMap<HServerLoad, Collection<String>> lightServers = master.getServerManager().getServersToLoad() .getLightServers(thisServersLoad); // Examine the list of servers that are more lightly loaded than this one. // Pretend that we will assign regions to these more lightly loaded servers // until they reach load equal with ours. Then, see how many regions are // left unassigned. That is how many regions we should assign to this server int nRegions = 0; for (Map.Entry<HServerLoad, Collection<String>> e : lightServers.entrySet()) { HServerLoad lightLoad = new HServerLoad(e.getKey()); do { lightLoad.setNumberOfRegions(lightLoad.getNumberOfRegions() + 1); nRegions += 1; } while (lightLoad.compareTo(thisServersLoad) <= 0 && nRegions < numUnassignedRegions); nRegions *= e.getValue().size(); if (nRegions >= numUnassignedRegions) { break; } } return nRegions; } /** * Get the set of regions that should be assignable in this pass. * * Note that no synchronization on regionsInTransition is needed because the * only caller (assignRegions, whose caller is ServerManager.processMsgs) owns * the monitor for RegionManager */ private Set<RegionState> regionsAwaitingAssignment(HServerInfo server) { // set of regions we want to assign to this server Set<RegionState> regionsToAssign = new HashSet<RegionState>(); boolean isSingleServer = isSingleRegionServer(); HServerAddress addr = server.getServerAddress(); boolean isMetaServer = isMetaServer(addr); RegionState rootState = null; boolean isPreferredAssignment = false; boolean reassigningMetas = (numberOfMetaRegions.get() > onlineMetaRegions.size()); boolean isMetaOrRoot = isMetaServer || isRootServer(addr); // Assign ROOT region if ROOT region is offline. synchronized (this.regionsInTransition) { rootState = regionsInTransition.get(HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString()); } if (rootState != null && rootState.isUnassigned()) { // just make sure it isn't hosting META regions (unless // it's the only server left). if (!isMetaServer || isSingleServer) { regionsToAssign.add(rootState); LOG.debug("Going to assign -ROOT- region to server " + server.getHostnamePort()); } return regionsToAssign; } // Don't assign META to this server who has already hosted any ROOT or META if (isMetaOrRoot && reassigningMetas && !isSingleServer) { return regionsToAssign; } // Get the set of the regions which are preserved // for the current region server Set<HRegionInfo> preservedRegionsForCurrentRS = assignmentManager.getTransientAssignments(addr); synchronized (this.regionsInTransition) { int nonPreferredAssignment = 0; for (RegionState regionState : regionsInTransition.values()) { HRegionInfo regionInfo = regionState.getRegionInfo(); if (regionInfo == null) continue; if (regionInfo.isRootRegion() && !regionState.isUnassigned()) { LOG.debug("The -ROOT- region" + " has been assigned and will be online soon. " + "Do nothing for server " + server.getHostnamePort()); break; } // Assign the META region here explicitly if (regionInfo.isMetaRegion()) { if (regionState.isUnassigned()) { regionsToAssign.clear(); regionsToAssign.add(regionState); LOG.debug("Going to assign META region: " + regionInfo.getRegionNameAsString() + " to server " + server.getHostnamePort()); } else { LOG.debug("The .META. region " + regionInfo.getRegionNameAsString() + " has been assigned and will be online soon. " + "Do nothing for server " + server.getHostnamePort()); } break; } // Can't assign user regions until all meta regions have been assigned, // the initial meta scan is done and there are enough online // region servers if (reassigningMetas || !master.getServerManager().hasEnoughRegionServers()) { LOG.debug("Cannot assign region " + regionInfo.getRegionNameAsString() + " because not all the META are online, " + "or the initial META scan is not completed, or there are no " + "enough online region servers"); continue; } // Cannot assign region which is NOT in the unassigned state if (!regionState.isUnassigned()) { continue; } if (preservedRegionsForCurrentRS == null || !preservedRegionsForCurrentRS.contains(regionInfo)) { if (assignmentManager.hasTransientAssignment(regionInfo) || nonPreferredAssignment > this.maxAssignInOneGo) { // Hold the region for its favored nodes and limit the number of // non preferred assignments for each region server. continue; } // This is a non preferred assignment. isPreferredAssignment = false; nonPreferredAssignment++; } else { isPreferredAssignment = true; } // Assign the current region to the region server. regionsToAssign.add(regionState); LOG.debug("Going to assign user region " + regionInfo.getRegionNameAsString() + " to server " + server.getHostnamePort() + " in a " + (isPreferredAssignment ? "" : "non-") + "preferred way"); } } return regionsToAssign; } /** * Get the set of regions that should be assignable in this pass. * * Note that no synchronization on regionsInTransition is needed because the * only caller (assignRegions, whose caller is ServerManager.processMsgs) owns * the monitor for RegionManager */ private Set<RegionState> regionsAwaitingAssignment(HServerAddress addr, boolean isSingleServer, MutableBoolean isPreferredAssignment, boolean assignmentByLocality, boolean holdRegionForBestRegionserver, Set<String> quickStartRegionServerSet) { // set of regions we want to assign to this server Set<RegionState> regionsToAssign = new HashSet<RegionState>(); Set<HRegionInfo> regions = assignmentManager.getTransientAssignments(addr); if (null != regions) { isPreferredAssignment.setValue(true); // One could use regionsInTransition.keySet().containsAll(regions) but // this provides more control and probably the same complexity. Also, this // gives direct logging of precise errors HRegionInfo[] regionInfo = regions.toArray(new HRegionInfo[regions.size()]); for (HRegionInfo ri : regionInfo) { RegionState state = regionsInTransition.get(ri.getRegionNameAsString()); if (null != state && state.isUnassigned()) { regionsToAssign.add(state); assignmentManager.removeTransientAssignment(addr, ri); } } StringBuilder regionNames = new StringBuilder(); regionNames.append("[ "); for (RegionState regionState : regionsToAssign) { regionNames.append(Bytes.toString(regionState.getRegionName())); regionNames.append(" , "); } regionNames.append(" ]"); LOG.debug("Assigning regions to " + addr + " : " + regionNames); // return its initial regions ASAP return regionsToAssign; } boolean isMetaServer = isMetaServer(addr); boolean isRootServer = isRootServer(addr); boolean isMetaOrRoot = isMetaServer || isRootServer; // lookup hostname of addr if needed String hostName = null; RegionState rootState = null; int nonPreferredAssignmentCount = 0; // Handle if root is unassigned... only assign root if root is offline. synchronized (this.regionsInTransition) { rootState = regionsInTransition.get(HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString()); } if (rootState != null && rootState.isUnassigned()) { // make sure root isnt assigned here first. // if so return 'empty list' // by definition there is no way this could be a ROOT region (since it's // unassigned) so just make sure it isn't hosting META regions (unless // it's the only server left). if (!isMetaServer || isSingleServer) { regionsToAssign.add(rootState); } return regionsToAssign; } // Look over the set of regions that aren't currently assigned to // determine which we should assign to this server. boolean reassigningMetas = numberOfMetaRegions.get() != onlineMetaRegions.size(); if (reassigningMetas && isMetaOrRoot && !isSingleServer) { return regionsToAssign; // dont assign anything to this server. } synchronized (this.regionsInTransition) { for (RegionState s : regionsInTransition.values()) { if (!s.isUnassigned()) { continue; } String regionName = s.getRegionInfo().getEncodedName(); String tableName = s.getRegionInfo().getTableDesc().getNameAsString(); String name = tableName + ":" + regionName; HRegionInfo i = s.getRegionInfo(); if (i == null) { continue; } if (reassigningMetas && !i.isMetaRegion()) { // Can't assign user regions until all meta regions have been assigned // and are on-line continue; } if (!i.isMetaRegion() && !master.getServerManager().hasEnoughRegionServers()) { LOG.debug("user region " + i.getRegionNameAsString() + " is in transition but not enough servers yet"); continue; } // if we are holding it, don't give it away to any other server if (assignmentManager.hasTransientAssignment(s.getRegionInfo())) { continue; } if (assignmentByLocality && !i.isRootRegion() && !i.isMetaRegion()) { Text preferredHostNameTxt = (Text) this.master.getPreferredRegionToRegionServerMapping() .get(new Text(name)); if (hostName == null) { hostName = addr.getHostname(); } if (preferredHostNameTxt != null) { String preferredHost = preferredHostNameTxt.toString(); if (hostName.startsWith(preferredHost)) { LOG.debug("Doing Preferred Region Assignment for : " + name + " to the " + hostName); // add the region to its preferred region server. regionsToAssign.add(s); continue; } else if (holdRegionForBestRegionserver || quickStartRegionServerSet.contains(preferredHost)) { continue; } } } // Only assign a configured number unassigned region at one time in the // non preferred assignment case. if ((nonPreferredAssignmentCount++) < this.maxAssignInOneGo) { regionsToAssign.add(s); } } } return regionsToAssign; } /* * Figure out the load that is next highest amongst all regionservers. Also, * return how many servers exist at that load. */ private int computeNextHeaviestLoad(HServerLoad referenceLoad, HServerLoad heavierLoad) { SortedMap<HServerLoad, Collection<String>> heavyServers = master.getServerManager().getServersToLoad() .getHeavyServers(referenceLoad); int nservers = 0; for (Map.Entry<HServerLoad, Collection<String>> e : heavyServers.entrySet()) { Collection<String> servers = e.getValue(); nservers += servers.size(); if (e.getKey().compareTo(referenceLoad) == 0) { // This is the load factor of the server we are considering nservers -= 1; continue; } // If we get here, we are at the first load entry that is a // heavier load than the server we are considering heavierLoad.setNumberOfRequests(e.getKey().getNumberOfRequests()); heavierLoad.setNumberOfRegions(e.getKey().getNumberOfRegions()); break; } return nservers; } /* * The server checking in right now is overloaded. We will tell it to close * some or all of its most loaded regions, allowing it to reduce its load. * The closed regions will then get picked up by other underloaded machines. * * Note that no synchronization is needed because the only caller * (assignRegions) whose caller owns the monitor for RegionManager */ void unassignSomeRegions(final HServerInfo info, int numRegionsToClose, final HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) { LOG.debug("Unassigning " + numRegionsToClose + " regions from " + info.getServerName()); int regionIdx = 0; int regionsClosed = 0; int skipped = 0; while (regionsClosed < numRegionsToClose && regionIdx < mostLoadedRegions.length) { HRegionInfo currentRegion = mostLoadedRegions[regionIdx]; regionIdx++; // skip the region if it's meta or root if (currentRegion.isRootRegion() || currentRegion.isMetaTable()) { continue; } final String regionName = currentRegion.getRegionNameAsString(); if (regionIsInTransition(regionName)) { skipped++; continue; } if (LOG.isDebugEnabled()) { LOG.debug("Going to close region " + regionName); } // make a message to close the region returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE, currentRegion, OVERLOADED)); // mark the region as closing setClosing(info.getServerName(), currentRegion, false); setPendingClose(regionName); // increment the count of regions we've marked regionsClosed++; } LOG.info("Skipped assigning " + skipped + " region(s) to " + info.getServerName() + " because already in transition"); } /* * PathFilter that accepts hbase tables only. */ static class TableDirFilter implements PathFilter { @Override public boolean accept(final Path path) { // skip the region servers' log dirs && version file // HBASE-1112 want to separate the log dirs from table's data dirs by a // special character. final String pathname = path.getName(); return (!pathname.equals(HConstants.HREGION_LOGDIR_NAME) && !pathname.equals(HConstants.VERSION_FILE_NAME)); } } /* * PathFilter that accepts all but compaction.dir names. */ static class RegionDirFilter implements PathFilter { @Override public boolean accept(Path path) { return !path.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME); } } /** * @return Read-only map of online regions. */ public Map<byte[], MetaRegion> getOnlineMetaRegions() { synchronized (onlineMetaRegions) { return Collections.unmodifiableMap(onlineMetaRegions); } } public boolean metaRegionsInTransition() { synchronized (onlineMetaRegions) { for (MetaRegion metaRegion : onlineMetaRegions.values()) { String regionName = Bytes.toString(metaRegion.getRegionName()); if (regionIsInTransition(regionName)) { return true; } } } return false; } /** * Return a map of the regions in transition on a server. * Returned map entries are region name -> RegionState */ Map<String, RegionState> getRegionsInTransitionOnServer(String serverName) { Map<String, RegionState> ret = new HashMap<String, RegionState>(); synchronized (regionsInTransition) { for (Map.Entry<String, RegionState> entry : regionsInTransition.entrySet()) { RegionState rs = entry.getValue(); if (serverName.equals(rs.getServerName())) { ret.put(entry.getKey(), rs); } } } return ret; } /** * Stop the root and meta scanners so that the region servers serving meta regions can shut down. * Not thread-safe, but if called twice from the same thread, scanners will only be stopped once. */ public void stopScanners() { if (!stoppedScanners) { this.rootScannerThread.interruptAndStop(); this.metaScannerThread.interruptAndStop(); stoppedScanners = true; } } /** * Force the rootScannerThread, and the metaScannerThread to scan the * root/meta region at once. * * These threads are supposed to scan ROOT/META regions every so often * (typically 1 min). This function is used to force the scan to happen * immediately. */ public void forceScans() { if (!stoppedScanners) { LOG.debug("Going to trigger a metaScan for -ROOT-"); this.rootScannerThread.triggerNow(); LOG.debug("Going to trigger a metaScan for .META."); this.metaScannerThread.triggerNow(); } } /** * Terminate all threads but don't clean up any state. */ public void joinThreads() { try { if (rootScannerThread.isAlive()) { rootScannerThread.join(); // Wait for the root scanner to finish. } } catch (InterruptedException iex) { LOG.warn("root scanner", iex); } try { if (metaScannerThread.isAlive()) { metaScannerThread.join(); // Wait for meta scanner to finish. } } catch (InterruptedException iex) { LOG.warn("meta scanner", iex); } } /** * Block until meta regions are online or we're shutting down. * @return true if we found meta regions, false if we're closing. */ public boolean areAllMetaRegionsOnline() { synchronized (onlineMetaRegions) { return (rootRegionLocation.get() != null && numberOfMetaRegions.get() <= onlineMetaRegions.size()); } } /** * Search our map of online meta regions to find the first meta region that * should contain a pointer to <i>newRegion</i>. * @param newRegion * @return MetaRegion where the newRegion should live */ public MetaRegion getFirstMetaRegionForRegion(HRegionInfo newRegion) { synchronized (onlineMetaRegions) { return getMetaRegionPointingTo(onlineMetaRegions, newRegion); } } static MetaRegion getMetaRegionPointingTo(NavigableMap<byte[], MetaRegion> metaRegions, HRegionInfo newRegion) { if (metaRegions.isEmpty()) { return null; } else if (metaRegions.size() == 1) { return metaRegions.get(metaRegions.firstKey()); } else { if (metaRegions.containsKey(newRegion.getRegionName())) { return metaRegions.get(newRegion.getRegionName()); } return metaRegions.get(metaRegions.headMap(newRegion.getRegionName()).lastKey()); } } /** * Get a set of all the meta regions that contain info about a given table. * @param tableName Table you need to know all the meta regions for * @return set of MetaRegion objects that contain the table * @throws NotAllMetaRegionsOnlineException */ public Set<MetaRegion> getMetaRegionsForTable(byte[] tableName) throws NotAllMetaRegionsOnlineException { byte[] firstMetaRegion = null; Set<MetaRegion> metaRegions = new HashSet<MetaRegion>(); if (Bytes.equals(tableName, HConstants.META_TABLE_NAME)) { if (rootRegionLocation.get() == null) { throw new NotAllMetaRegionsOnlineException(Bytes.toString(HConstants.ROOT_TABLE_NAME)); } metaRegions .add(new MetaRegion(rootRegionLocation.get().getServerAddress(), HRegionInfo.ROOT_REGIONINFO)); } else { if (!areAllMetaRegionsOnline()) { throw new NotAllMetaRegionsOnlineException(); } synchronized (onlineMetaRegions) { if (onlineMetaRegions.size() == 1) { firstMetaRegion = onlineMetaRegions.firstKey(); } else if (onlineMetaRegions.containsKey(tableName)) { firstMetaRegion = tableName; } else { firstMetaRegion = onlineMetaRegions.headMap(tableName).lastKey(); } metaRegions.addAll(onlineMetaRegions.tailMap(firstMetaRegion).values()); } } return metaRegions; } /** * Get metaregion that would host passed in row. * @param row Row need to know all the meta regions for * @return MetaRegion for passed row. * @throws NotAllMetaRegionsOnlineException */ public MetaRegion getMetaRegionForRow(final byte[] row) throws NotAllMetaRegionsOnlineException { if (!areAllMetaRegionsOnline()) { throw new NotAllMetaRegionsOnlineException(); } // Row might be in -ROOT- table. If so, return -ROOT- region. int prefixlen = META_REGION_PREFIX.length; if (row.length > prefixlen && Bytes.compareTo(META_REGION_PREFIX, 0, prefixlen, row, 0, prefixlen) == 0) { return new MetaRegion(this.master.getRegionManager().getRootRegionLocation(), HRegionInfo.ROOT_REGIONINFO); } return this.onlineMetaRegions.floorEntry(row).getValue(); } /** * Create a new HRegion, put a row for it into META (or ROOT), and mark the * new region unassigned so that it will get assigned to a region server. * @param newRegion HRegionInfo for the region to create * @param server server hosting the META (or ROOT) region where the new * region needs to be noted * @param metaRegionName name of the meta region where new region is to be * written * @throws IOException */ public void createRegion(HRegionInfo newRegion, HRegionInterface server, byte[] metaRegionName) throws IOException { createRegion(newRegion, server, metaRegionName, null); } /** * Create a new HRegion, put a row for it into META (or ROOT), and mark the * new region unassigned so that it will get assigned to a region server. * @param newRegion HRegionInfo for the region to create * @param server server hosting the META (or ROOT) region where the new * region needs to be noted * @param metaRegionName name of the meta region where new region is to be * @param favoriteNodeList The list of favorite nodes for this new region. * written * @throws IOException */ public void createRegion(HRegionInfo newRegion, HRegionInterface server, byte[] metaRegionName, List<HServerAddress> favoriteNodeList) throws IOException { // 2. Create the HRegion HRegion region = HRegion.createHRegion(newRegion, this.master.getRootDir(), master.getConfiguration()); // 3. Insert into meta HRegionInfo info = region.getRegionInfo(); byte[] regionName = region.getRegionName(); Put put = new Put(regionName); // 3.1 Put the region info into meta table. put.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, Writables.getBytes(info)); // 3.2 Put the favorite nodes into meta. if (favoriteNodeList != null) { String favoredNodes = RegionPlacement.getFavoredNodes(favoriteNodeList); put.add(HConstants.CATALOG_FAMILY, HConstants.FAVOREDNODES_QUALIFIER, EnvironmentEdgeManager.currentTimeMillis(), favoredNodes.getBytes()); LOG.info("Create the region " + info.getRegionNameAsString() + " with favored nodes " + favoredNodes); } server.put(metaRegionName, put); // 4. Close the new region to flush it to disk. Close its log file too. region.close(); region.getLog().closeAndDelete(); // After all regions are created, the caller will schedule // the meta scanner to run immediately and assign out the // regions. } /** * Set a MetaRegion as online. * @param metaRegion */ public void putMetaRegionOnline(MetaRegion metaRegion) { onlineMetaRegions.put(metaRegion.getStartKey(), metaRegion); } /** * Get a list of online MetaRegions * @return list of MetaRegion objects */ public List<MetaRegion> getListOfOnlineMetaRegions() { List<MetaRegion> regions; synchronized (onlineMetaRegions) { regions = new ArrayList<MetaRegion>(onlineMetaRegions.values()); } return regions; } /** * Count of online meta regions * @return count of online meta regions */ public int numOnlineMetaRegions() { return onlineMetaRegions.size(); } /** * Check if a meta region is online by its name * @param startKey name of the meta region to check * @return true if the region is online, false otherwise */ public boolean isMetaRegionOnline(byte[] startKey) { return onlineMetaRegions.containsKey(startKey); } /** * Set an online MetaRegion offline - remove it from the map. * @param startKey Startkey to use finding region to remove. * @return the MetaRegion that was taken offline. */ public MetaRegion offlineMetaRegionWithStartKey(byte[] startKey) { LOG.info("META region whose startkey is " + Bytes.toString(startKey) + " removed from onlineMetaRegions"); return onlineMetaRegions.remove(startKey); } public boolean isRootServer(HServerAddress server) { return this.master.getRegionManager().getRootRegionLocation() != null && server.equals(master.getRegionManager().getRootRegionLocation()); } /** * Returns the list of byte[] start-keys for any .META. regions hosted * on the indicated server. * * @param server server address * @return list of meta region start-keys. */ public List<byte[]> listMetaRegionsForServer(HServerAddress server) { List<byte[]> metas = new ArrayList<byte[]>(); for (MetaRegion region : onlineMetaRegions.values()) { if (server.equals(region.getServer())) { metas.add(region.getStartKey()); } } return metas; } /** * Does this server have any META regions open on it, or any meta * regions being assigned to it? * * @param server Server IP:port * @return true if server has meta region assigned */ public boolean isMetaServer(HServerAddress server) { for (MetaRegion region : onlineMetaRegions.values()) { if (server.equals(region.getServer())) { return true; } } // This might be expensive, but we need to make sure we dont // get double assignment to the same regionserver. synchronized (regionsInTransition) { for (RegionState s : regionsInTransition.values()) { if (s.getRegionInfo().isMetaRegion() && !s.isUnassigned() && s.getServerName() != null && s.getServerName().equals(server.toString())) { // TODO this code appears to be entirely broken, since // server.toString() has no start code, but s.getServerName() // does! LOG.fatal("I DONT BELIEVE YOU WILL EVER SEE THIS!"); // Has an outstanding meta region to be assigned. return true; } } } return false; } /** * Is this server assigned to transition the ROOT table. HBASE-1928 * * @param server Server * @return true if server is transitioning the ROOT table */ public boolean isRootInTransitionOnThisServer(final String server) { synchronized (this.regionsInTransition) { for (RegionState s : regionsInTransition.values()) { if (s.getRegionInfo().isRootRegion() && !s.isUnassigned() && s.getServerName() != null && s.getServerName().equals(server)) { // Has an outstanding root region to be assigned. return true; } } } return false; } /** * Is this server assigned to transition a META table. HBASE-1928 * * @param server Server * @return if this server was transitioning a META table then a not null HRegionInfo pointing to it */ public HRegionInfo getMetaServerRegionInfo(final String server) { synchronized (this.regionsInTransition) { for (RegionState s : regionsInTransition.values()) { if (s.getRegionInfo().isMetaRegion() && !s.isUnassigned() && s.getServerName() != null && s.getServerName().equals(server)) { // Has an outstanding meta region to be assigned. return s.getRegionInfo(); } } } return null; } /** * Call to take this metaserver offline for immediate reassignment. Used only * when we know a region has shut down cleanly. * * A meta server is a server that hosts either -ROOT- or any .META. regions. * * If you are considering a unclean shutdown potentially, use ProcessServerShutdown which * calls other methods to immediately unassign root/meta but delay the reassign until the * log has been split. * * @param server the server that went down * @return true if this was in fact a meta server, false if it did not carry meta regions. */ public synchronized boolean offlineMetaServer(HServerAddress server) { boolean hasMeta = false; // check to see if ROOT and/or .META. are on this server, reassign them. // use master.getRootRegionLocation. if (master.getRegionManager().getRootRegionLocation() != null && server.equals(master.getRegionManager().getRootRegionLocation())) { LOG.info("Offlined ROOT server: " + server); reassignRootRegion(); hasMeta = true; } // AND for (MetaRegion region : onlineMetaRegions.values()) { if (server.equals(region.getServer())) { LOG.info("Offlining META region: " + region); offlineMetaRegionWithStartKey(region.getStartKey()); // Set for reassignment. setUnassigned(region.getRegionInfo(), true); hasMeta = true; } } return hasMeta; } /** * Remove a region from the region state map. * * @param info */ public void removeRegion(HRegionInfo info) { synchronized (this.regionsInTransition) { this.regionsInTransition.remove(info.getRegionNameAsString()); zkWrapper.deleteUnassignedRegion(info.getEncodedName()); } } /** * @param regionName * @return true if the named region is in a transition state */ public boolean regionIsInTransition(String regionName) { synchronized (this.regionsInTransition) { return regionsInTransition.containsKey(regionName); } } /** * @param regionName * @return true if the region is unassigned, pendingOpen or open */ public boolean regionIsOpening(String regionName) { synchronized (this.regionsInTransition) { RegionState state = regionsInTransition.get(regionName); if (state != null) { return state.isOpening(); } } return false; } /** * Set a region to unassigned. Always writes the region's unassigned znode. * @param info Region to set unassigned * @param force if true mark region unassigned whatever its current state */ void setUnassigned(HRegionInfo info, boolean force) { setUnassignedGeneral(true, info, force); } void setUnassignedGeneral(boolean writeToZK, HRegionInfo info, boolean force) { RegionState s = null; long t0, t1, t2, t3; t0 = System.currentTimeMillis(); synchronized (this.regionsInTransition) { t1 = System.currentTimeMillis(); s = regionsInTransition.get(info.getRegionNameAsString()); if (s == null) { byte[] data = null; try { data = Writables.getBytes( new RegionTransitionEventData(HBaseEventType.M2ZK_REGION_OFFLINE, HMaster.MASTER)); } catch (IOException e) { // TODO: Review what we should do here. If Writables work this // should never happen LOG.error("Error creating event data for " + HBaseEventType.M2ZK_REGION_OFFLINE, e); } if (writeToZK) { zkWrapper.createOrUpdateUnassignedRegion(info.getEncodedName(), data); LOG.debug("Created/updated UNASSIGNED zNode " + info.getRegionNameAsString() + " in state " + HBaseEventType.M2ZK_REGION_OFFLINE); } s = new RegionState(info, RegionState.State.UNASSIGNED); regionsInTransition.put(info.getRegionNameAsString(), s); } t2 = System.currentTimeMillis(); if (force || (!s.isPendingOpen() && !s.isOpen())) { // Refresh assignment information when a region is marked unassigned so // that it opens on the preferred server. this.assignmentManager.executeAssignmentPlan(info); s.setUnassigned(); } t3 = System.currentTimeMillis(); } if (LOG.isDebugEnabled()) LOG.debug("Took " + this.toString() + " " + (t3 - t0) + " ms. for RegionManager.setUnassigned " + (t1 - t0) + " ms. to get the lock. " + (t2 - t1) + " ms. to update regionsInTransition. " + (t3 - t2) + " ms. to executeAssignmentPlan. "); } /** * Check if a region is on the unassigned list * @param info HRegionInfo to check for * @return true if on the unassigned list, false if it isn't. Note that this * means a region could not be on the unassigned list AND not be assigned, if * it happens to be between states. */ public boolean isUnassigned(HRegionInfo info) { synchronized (regionsInTransition) { RegionState s = regionsInTransition.get(info.getRegionNameAsString()); if (s != null) { return s.isUnassigned(); } } return false; } /** * Check if a region has been assigned and we're waiting for a response from * the region server. * * @param regionName name of the region * @return true if open, false otherwise */ public boolean isPendingOpen(String regionName) { synchronized (regionsInTransition) { RegionState s = regionsInTransition.get(regionName); if (s != null) { return s.isPendingOpen(); } } return false; } /** * Region has been assigned to a server and the server has told us it is open * @param regionName */ public void setOpen(String regionName) { Preconditions.checkNotNull(regionName); synchronized (regionsInTransition) { RegionState s = regionsInTransition.get(regionName); if (s != null) { s.setOpen(); this.master.getMetrics().incRegionsOpened(); if (s.serverName != null) { this.regionLocationHintToDetectDupAssignment.put(regionName, s.serverName); } } } } /** * Check if the region was last opened at the particular server * @param regionName * @param serverName * @return true if regionName was last opened at serverName */ public boolean lastOpenedAt(String regionName, String serverName) { String openAt = this.regionLocationHintToDetectDupAssignment.get(regionName); return openAt != null && openAt.equals(serverName); } /** * @param regionName * @return true if region is marked to be offlined. */ public boolean isOfflined(String regionName) { synchronized (regionsInTransition) { RegionState s = regionsInTransition.get(regionName); if (s != null) { return s.isOfflined(); } } return false; } /** * Mark a region as closing * @param serverName * @param regionInfo * @param setOffline */ public void setClosing(String serverName, final HRegionInfo regionInfo, final boolean setOffline) { synchronized (this.regionsInTransition) { RegionState s = this.regionsInTransition.get(regionInfo.getRegionNameAsString()); if (s == null) { s = new RegionState(regionInfo, RegionState.State.CLOSING); } // If region was asked to open before getting here, we could be taking // the wrong server name if (s.isPendingOpen()) { serverName = s.getServerName(); } s.setClosing(serverName, setOffline); this.regionsInTransition.put(regionInfo.getRegionNameAsString(), s); if (!setOffline) { // Refresh assignment information when a region is closed and not // marked offline so that it opens on the preferred server. this.assignmentManager.executeAssignmentPlan(regionInfo); } } } /** * Remove the map of region names to region infos waiting to be offlined for a * given server * * @param serverName * @return set of infos to close */ public Set<HRegionInfo> getMarkedToClose(String serverName) { Set<HRegionInfo> result = new HashSet<HRegionInfo>(); synchronized (regionsInTransition) { for (RegionState s : regionsInTransition.values()) { if (s.isClosing() && !s.isPendingClose() && !s.isClosed() && s.getServerName().compareTo(serverName) == 0) { result.add(s.getRegionInfo()); } } } return result; } /** * Called when we have told a region server to close the region * * @param regionName */ public void setPendingClose(String regionName) { synchronized (regionsInTransition) { RegionState s = regionsInTransition.get(regionName); if (s != null) { s.setPendingClose(); } } } /** * @param regionName */ public void setClosed(String regionName) { synchronized (regionsInTransition) { RegionState s = regionsInTransition.get(regionName); if (s != null) { s.setClosed(); } } } /** * Get the root region location. * @return HServerAddress describing root region server. */ public HServerAddress getRootRegionLocation() { return HServerInfo.getAddress(rootRegionLocation.get()); } /** Returns root region location as a server info object (with a start code) */ public HServerInfo getRootServerInfo() { return rootRegionLocation.get(); } /** * Block until either the root region location is available or we're shutting * down. */ public void waitForRootRegionLocation() { synchronized (rootRegionLocation) { while (!master.isStopped() && rootRegionLocation.get() == null) { // rootRegionLocation will be filled in when we get an 'open region' // regionServerReport message from the HRegionServer that has been // allocated the ROOT region below. try { // Cycle rather than hold here in case master is closed meantime. rootRegionLocation.wait(this.master.getThreadWakeFrequency()); } catch (InterruptedException e) { LOG.error("Interrupted when waiting for root region location"); continue; } } } } /** * Return the number of meta regions. * @return number of meta regions */ public int numMetaRegions() { return numberOfMetaRegions.get(); } /** * Bump the count of meta regions up one */ public void incrementNumMetaRegions() { numberOfMetaRegions.incrementAndGet(); } private long getPauseTime(int tries) { int attempt = tries; if (attempt >= HConstants.RETRY_BACKOFF.length) { attempt = HConstants.RETRY_BACKOFF.length - 1; } return this.zooKeeperPause * HConstants.RETRY_BACKOFF[attempt]; } private void sleep(int attempt) { try { Thread.sleep(getPauseTime(attempt)); } catch (InterruptedException e) { // continue } } private void writeRootRegionLocationToZooKeeper(HServerInfo hsi) { for (int attempt = 0; attempt < zooKeeperNumRetries; ++attempt) { if (master.getZooKeeperWrapper().writeRootRegionLocation(hsi)) { return; } sleep(attempt); } LOG.error("Failed to write root region location to ZooKeeper after " + zooKeeperNumRetries + " retries, shutting down the cluster"); this.master.requestClusterShutdown(); } /** * Set the root region location. * @param address Address of the region server where the root lives */ public void setRootRegionLocation(HServerInfo hsi) { writeRootRegionLocationToZooKeeper(hsi); synchronized (rootRegionLocation) { // the root region has been assigned, remove it from transition in ZK zkWrapper.deleteUnassignedRegion(HRegionInfo.ROOT_REGIONINFO.getEncodedName()); rootRegionLocation.set(new HServerInfo(hsi)); rootRegionLocation.notifyAll(); } } /** * Set the number of meta regions. * @param num Number of meta regions */ public void setNumMetaRegions(int num) { numberOfMetaRegions.set(num); } /** * Starts an action that is specific to a column family. * @param regionName * @param columnFamily * @param info * @param server * @param op */ public void startCFAction(byte[] regionName, byte[] columnFamily, HRegionInfo info, HServerAddress server, HConstants.Modify op) { if (LOG.isDebugEnabled()) { LOG.debug("Adding operation " + op + " for column family : " + new String(columnFamily) + " from tasklist"); } switch (op) { case TABLE_COMPACT: startCFAction(regionName, columnFamily, info, server, this.cfsToCompact); break; case TABLE_MAJOR_COMPACT: startCFAction(regionName, columnFamily, info, server, this.cfsToMajorCompact); break; default: throw new IllegalArgumentException("illegal table action " + op); } } private void startCFAction(final byte[] regionName, final byte[] columnFamily, final HRegionInfo info, final HServerAddress server, final SortedMap<byte[], SortedMap<byte[], Pair<HRegionInfo, HServerAddress>>> map) { synchronized (map) { SortedMap<byte[], Pair<HRegionInfo, HServerAddress>> cfMap = map.get(regionName); if (cfMap == null) { cfMap = Collections.synchronizedSortedMap( new TreeMap<byte[], Pair<HRegionInfo, HServerAddress>>(Bytes.BYTES_COMPARATOR)); } cfMap.put(columnFamily, new Pair<HRegionInfo, HServerAddress>(info, server)); map.put(regionName, cfMap); } } /** * @param regionName * @param info * @param server * @param op */ public void startAction(byte[] regionName, HRegionInfo info, HServerAddress server, HConstants.Modify op) { if (LOG.isDebugEnabled()) { LOG.debug("Adding operation " + op + " from tasklist"); } switch (op) { case TABLE_SPLIT: startAction(regionName, info, server, this.regionsToSplit); break; case TABLE_COMPACT: startAction(regionName, info, server, this.regionsToCompact); break; case TABLE_MAJOR_COMPACT: startAction(regionName, info, server, this.regionsToMajorCompact); break; case TABLE_FLUSH: startAction(regionName, info, server, this.regionsToFlush); break; default: throw new IllegalArgumentException("illegal table action " + op); } } private void startAction(final byte[] regionName, final HRegionInfo info, final HServerAddress server, final SortedMap<byte[], Pair<HRegionInfo, HServerAddress>> map) { map.put(regionName, new Pair<HRegionInfo, HServerAddress>(info, server)); } /** * @param regionName */ public void endActions(byte[] regionName) { regionsToSplit.remove(regionName); regionsToCompact.remove(regionName); cfsToCompact.remove(regionName); cfsToMajorCompact.remove(regionName); } /** * Send messages to the given region server asking it to split any * regions in 'regionsToSplit', etc. * @param serverInfo * @param returnMsgs */ public void applyActions(HServerInfo serverInfo, ArrayList<HMsg> returnMsgs) { applyActions(serverInfo, returnMsgs, this.regionsToCompact, HMsg.Type.MSG_REGION_COMPACT); applyActions(serverInfo, returnMsgs, this.regionsToSplit, HMsg.Type.MSG_REGION_SPLIT); applyActions(serverInfo, returnMsgs, this.regionsToFlush, HMsg.Type.MSG_REGION_FLUSH); applyActions(serverInfo, returnMsgs, this.regionsToMajorCompact, HMsg.Type.MSG_REGION_MAJOR_COMPACT); // CF specific actions for a region. applyCFActions(serverInfo, returnMsgs, this.cfsToCompact, HMsg.Type.MSG_REGION_CF_COMPACT); applyCFActions(serverInfo, returnMsgs, this.cfsToMajorCompact, HMsg.Type.MSG_REGION_CF_MAJOR_COMPACT); } private void applyActions(final HServerInfo serverInfo, final ArrayList<HMsg> returnMsgs, final SortedMap<byte[], Pair<HRegionInfo, HServerAddress>> map, final HMsg.Type msg) { HServerAddress addr = serverInfo.getServerAddress(); synchronized (map) { Iterator<Pair<HRegionInfo, HServerAddress>> i = map.values().iterator(); while (i.hasNext()) { Pair<HRegionInfo, HServerAddress> pair = i.next(); if (addr.equals(pair.getSecond())) { if (LOG.isDebugEnabled()) { LOG.debug("Sending " + msg + " " + pair.getFirst() + " to " + addr); } returnMsgs.add(new HMsg(msg, pair.getFirst())); i.remove(); } } } } /** * Applies actions specific to a column family within a region. */ private void applyCFActions(final HServerInfo serverInfo, final ArrayList<HMsg> returnMsgs, final SortedMap<byte[], SortedMap<byte[], Pair<HRegionInfo, HServerAddress>>> map, final HMsg.Type msg) { HServerAddress addr = serverInfo.getServerAddress(); synchronized (map) { Iterator<SortedMap<byte[], Pair<HRegionInfo, HServerAddress>>> it1 = map.values().iterator(); while (it1.hasNext()) { SortedMap<byte[], Pair<HRegionInfo, HServerAddress>> cfMap = it1.next(); Iterator<Map.Entry<byte[], Pair<HRegionInfo, HServerAddress>>> it2 = cfMap.entrySet().iterator(); while (it2.hasNext()) { Map.Entry mapPairs = it2.next(); Pair<HRegionInfo, HServerAddress> pair = (Pair<HRegionInfo, HServerAddress>) mapPairs .getValue(); if (addr.equals(pair.getSecond())) { byte[] columnFamily = (byte[]) mapPairs.getKey(); if (LOG.isDebugEnabled()) { LOG.debug("Sending " + msg + " " + pair.getFirst() + " to " + addr + " for column family : " + new String(columnFamily)); } returnMsgs.add(new HMsg(msg, pair.getFirst(), columnFamily)); it2.remove(); } } if (cfMap.isEmpty()) { // If entire map is empty, remove it from the parent map. it1.remove(); } } } } /** * Classes which implement LoadBalancer are used to balance regions across * servers. They operate by unassigning some regions from a server so that * those regions can be assigned to other servers. */ abstract class LoadBalancer { // The maximum number of regions to close on one server during one iteration // of load balancing. // -1 or 0 to turn off // TODO: change default in HBASE-862, need a suggestion protected final int maxRegToClose; // hbase.regions.close.max protected final float slop; // hbase.regions.slop LoadBalancer() { Configuration conf = master.getConfiguration(); float confSlop = conf.getFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 0.3); this.slop = confSlop <= 0 ? 1 : confSlop; this.maxRegToClose = conf.getInt("hbase.regions.close.max", -1); } /** * Balance regions across servers by unassigning some regions from the * specified server if they could be served elsewhere with better load * distribution. * @param info the server whose regions are being balanced * @param mostLoadedRegions the regions to consider for balancing * @param returnMsgs any regions to be unassigned will be added here */ public abstract void loadBalancing(HServerInfo info, HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs); } /** * Class to balance regions according to preferred assignments. Regions which * are not on their preferred host but could be will be unassigned from their * current host and assigned to their preferred host. This behavior will also * consider secondary and tertiary preferred hosts if the primary is dead. */ class AssignmentLoadBalancer extends LoadBalancer { AssignmentLoadBalancer() { super(); } /** * Unassign some regions if there is a server with a higher preference, or * if a server with equal preference has a lower load. * @param info the server from which to unassign regions * @param mostLoadedRegions the candidate regions for moving * @param returnMsgs region close messages to be passed to the server */ public void loadBalancing(HServerInfo info, HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) { int regionsUnassigned = balanceToPrimary(info, mostLoadedRegions, returnMsgs); if (regionsUnassigned <= 0) { regionsUnassigned = balanceFromUnfavored(info, mostLoadedRegions, returnMsgs); } if (regionsUnassigned <= 0) { regionsUnassigned = balanceSecondaries(info, mostLoadedRegions, returnMsgs); } } /** * If the primary assignment of any region hosted by the server {@code info} * is not that server, but the primary assignment server is alive, move that * region to the primary assignment server. * @param info the server whose regions to balance * @param mostLoadedRegions the regions to balance * @param returnMsgs region close messages to be passed to the server * @return the number of regions that were unassigned */ private int balanceToPrimary(HServerInfo info, HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) { int regionsUnassigned = 0; // If for any of these regions, this server is not the primary but the // primary is alive, unassign that region and let it move to the primary. for (HRegionInfo region : mostLoadedRegions) { List<HServerAddress> preferences = assignmentManager.getAssignmentFromPlan(region); if (preferences == null || preferences.size() == 0) { // No prefered assignment, do nothing. continue; } else if (info.getServerAddress().equals(preferences.get(0))) { // This server is the primary, do nothing. continue; } else { if (getLoadIfAlive(preferences.get(0)) == null) { // Primary server is not alive, try next region. continue; } // Primary server is alive, unassign this region. if (unassignRegion(info, region, returnMsgs)) { regionsUnassigned++; if (regionsUnassigned >= maxRegToClose && maxRegToClose > 0) { LOG.debug("Unassigned " + region.getRegionNameAsString() + " from the server " + info.getHostnamePort() + " because the primary server: " + preferences.get(0) + " is a live"); return regionsUnassigned; } } } } return regionsUnassigned; } /** * If for any of the regions hosted by server {@code info}, that server is * not a favored node and one of the favored nodes for that region is alive, * move that region to a favored node. * @param info the server whose regions to balance * @param mostLoadedRegions the regions to balance * @param returnMsgs region close messages to be passed to the server * @return the number of regions that were unassigned */ private int balanceFromUnfavored(HServerInfo info, HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) { int regionsUnassigned = 0; for (HRegionInfo region : mostLoadedRegions) { List<HServerAddress> preferences = assignmentManager.getAssignmentFromPlan(region); if (preferences == null || preferences.size() == 0) { // No preferredAssignment, do nothing. continue; } else if (preferences.contains(info.getServerAddress())) { // This server already a favored node, do nothing. continue; } int leastLoad = Integer.MAX_VALUE; HServerAddress leastLoadedSecondary = null; // This server is not a favored node for the current region. Check if // one of the secondary servers is alive, and determine which of those // has the least load. for (int i = 1; i < preferences.size(); i++) { HServerLoad secondaryLoad = getLoadIfAlive(preferences.get(i)); if (secondaryLoad != null && secondaryLoad.getNumberOfRegions() < leastLoad) { leastLoad = secondaryLoad.getNumberOfRegions(); leastLoadedSecondary = preferences.get(i); } } if (leastLoadedSecondary != null) { // Move the region if the current server is not a preferred assignment // for that region. if (unassignRegion(info, region, returnMsgs)) { regionsUnassigned++; if (regionsUnassigned >= maxRegToClose && maxRegToClose > 0) { LOG.debug("Unassigned " + region.getRegionNameAsString() + " from the unfavoraed server " + info.getHostnamePort() + " because one least loaded secondary server: " + leastLoadedSecondary + " is a live"); return regionsUnassigned; } } } } return regionsUnassigned; } /** * For any of the regions hosted by server {@code info}, if that server is * currently hosted by an overloaded secondary node and another secondary * node is underloaded, move the region from the overloaded node to the * underloaded one. * @param info the server whose regions to balance * @param mostLoadedRegions the regions to balance * @param returnMsgs region close messages to be passed to the server * @return the number of regions that were unassigned */ private int balanceSecondaries(HServerInfo info, HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) { int regionsUnassigned = 0; double avgLoad = master.getAverageLoad(); int avgLoadMinusSlop = (int) Math.floor(avgLoad * (1 - this.slop)) - 1; int avgLoadPlusSlop = (int) Math.ceil(avgLoad * (1 + this.slop)); int serverLoad = master.getServerManager().getServersToLoad().get(info.getServerName()) .getNumberOfRegions(); for (HRegionInfo region : mostLoadedRegions) { List<HServerAddress> preferences = assignmentManager.getAssignmentFromPlan(region); if (preferences == null || preferences.size() == 0) { // No preferredAssignment, do nothing. continue; } else if (info.getServerAddress().equals(preferences.get(0))) { // This server is the primary, do nothing. continue; } // This server is not the primary for the current region. Check if // another favored node has lower load and move the region there if so. for (int i = 1; i < preferences.size(); i++) { if (preferences.get(i).equals(info.getServerAddress())) { // Same server as currently hosting region, try next one. continue; } HServerLoad otherLoad = getLoadIfAlive(preferences.get(i)); if (otherLoad == null) { // Other server is not alive, try next one. continue; } // Only move the region if the other server is under-loaded and the // current server is overloaded. if (serverLoad - regionsUnassigned > avgLoadPlusSlop && otherLoad.getNumberOfRegions() < avgLoadMinusSlop) { if (unassignRegion(info, region, returnMsgs)) { // Need to override transient assignment that may have been added // for the region to its current server when unassigning. assignmentManager.removeTransientAssignment(info.getServerAddress(), region); assignmentManager.addTransientAssignment(preferences.get(i), region); regionsUnassigned++; if (regionsUnassigned >= maxRegToClose && maxRegToClose > 0) { LOG.debug("Unassigned " + region.getRegionNameAsString() + " from the overloaded secondary server: " + info.getHostnamePort() + " because another low loaded secondary server: " + preferences.get(i) + " is a live"); return regionsUnassigned; } } } } } return regionsUnassigned; } /** * Get the load for the region server at the address {@code server} unless * that server does not exist, is dead, or is shutting down. In cases where * the load cannot be retrieved, return null. * @param server the address of the server whose load to get * @return the load for the server or null */ private HServerLoad getLoadIfAlive(HServerAddress server) { HServerInfo other = master.getServerManager().getHServerInfo(server); if (other == null || master.getServerManager().isDeadProcessingPending(other.getServerName())) { return null; } return master.getServerManager().getServersToLoad().get(other.getServerName()); } /** * Unassign a certain region from a certain server, unless that region is * already in transition. A region close message will be added tot he list * of return messages. * @param info the server on which to close the region * @param region the region to be unassigned * @param returnMsgs a region close message will be added here * @return true if the region was unassigned */ private boolean unassignRegion(HServerInfo info, HRegionInfo region, ArrayList<HMsg> returnMsgs) { if (region.isRootRegion() || region.isMetaTable()) { return false; } final String regionName = region.getRegionNameAsString(); if (regionIsInTransition(regionName)) { // Region may have already been unassigned, abort this operation. return false; } if (LOG.isDebugEnabled()) { LOG.debug("AssignmentLoadBalancer going to close region " + regionName); } // Make a message to close the region returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE, region, OVERLOADED)); // Mark the region as closing setClosing(info.getServerName(), region, false); setPendingClose(regionName); return true; } } /** * Class to balance region servers load. * It keeps Region Servers load in slop range by unassigning Regions * from most loaded servers. * * Equilibrium is reached when load of all serves are in slop range * [avgLoadMinusSlop, avgLoadPlusSlop], where * avgLoadPlusSlop = Math.ceil(avgLoad * (1 + this.slop)), and * avgLoadMinusSlop = Math.floor(avgLoad * (1 - this.slop)) - 1. */ class DefaultLoadBalancer extends LoadBalancer { DefaultLoadBalancer() { super(); } /** * Balance server load by unassigning some regions. * * @param info - server info * @param mostLoadedRegions - array of most loaded regions * @param returnMsgs - array of return massages */ public void loadBalancing(HServerInfo info, HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) { HServerLoad servLoad = info.getLoad(); double avg = master.getAverageLoad(); // nothing to balance if server load not more then average load if (servLoad.getLoad() <= Math.floor(avg) || avg <= 2.0) { return; } // check if current server is overloaded int numRegionsToClose = balanceFromOverloaded(info.getServerName(), servLoad, avg); // check if we can unload server by low loaded servers if (numRegionsToClose <= 0) { numRegionsToClose = balanceToLowloaded(info.getServerName(), servLoad, avg); } if (maxRegToClose > 0) { numRegionsToClose = Math.min(numRegionsToClose, maxRegToClose); } if (numRegionsToClose > 0) { unassignSomeRegions(info, numRegionsToClose, mostLoadedRegions, returnMsgs); } } /* * Check if server load is not overloaded (with load > avgLoadPlusSlop). * @return number of regions to unassign. */ private int balanceFromOverloaded(final String serverName, HServerLoad srvLoad, double avgLoad) { int avgLoadPlusSlop = (int) Math.ceil(avgLoad * (1 + this.slop)); int numSrvRegs = srvLoad.getNumberOfRegions(); if (numSrvRegs > avgLoadPlusSlop) { if (LOG.isDebugEnabled()) { LOG.debug("Server " + serverName + " is carrying more than its fair " + "share of regions: " + "load=" + numSrvRegs + ", avg=" + avgLoad + ", slop=" + this.slop); } return numSrvRegs - (int) Math.ceil(avgLoad); } return 0; } /* * Check if server is most loaded and can be unloaded to * low loaded servers (with load < avgLoadMinusSlop). * @return number of regions to unassign. */ private int balanceToLowloaded(String srvName, HServerLoad srvLoad, double avgLoad) { ServerLoadMap<HServerLoad> serverLoadMap = master.getServerManager().getServersToLoad(); if (!serverLoadMap.isMostLoadedServer(srvName)) return 0; // this server is most loaded, we will try to unload it by lowest // loaded servers int avgLoadMinusSlop = (int) Math.floor(avgLoad * (1 - this.slop)) - 1; HServerLoad lowestServerLoad = serverLoadMap.getLowestLoad(); int lowestLoad = lowestServerLoad.getNumberOfRegions(); if (lowestLoad >= avgLoadMinusSlop) return 0; // there is no low loaded servers int lowSrvCount = serverLoadMap.numServersByLoad(lowestServerLoad); int numSrvRegs = srvLoad.getNumberOfRegions(); int numMoveToLowLoaded = (avgLoadMinusSlop - lowestLoad) * lowSrvCount; int numRegionsToClose = numSrvRegs - (int) Math.floor(avgLoad); numRegionsToClose = Math.min(numRegionsToClose, numMoveToLowLoaded); if (LOG.isDebugEnabled()) { LOG.debug("Server(s) are carrying only " + lowestLoad + " regions. " + "Server " + srvName + " is most loaded (" + numSrvRegs + "). Shedding " + numRegionsToClose + " regions to pass to " + " least loaded (numMoveToLowLoaded=" + numMoveToLowLoaded + ")"); } return numRegionsToClose; } } /** * @return Snapshot of regionsintransition as a sorted Map. */ NavigableMap<String, String> getRegionsInTransition() { NavigableMap<String, String> result = new TreeMap<String, String>(); synchronized (this.regionsInTransition) { if (this.regionsInTransition.isEmpty()) return result; for (Map.Entry<String, RegionState> e : this.regionsInTransition.entrySet()) { result.put(e.getKey(), e.getValue().toString()); } } return result; } /** * @param regionname Name to clear from regions in transistion. * @return True if we removed an element for the passed regionname. */ boolean clearFromInTransition(final byte[] regionname) { boolean result = false; synchronized (this.regionsInTransition) { if (this.regionsInTransition.isEmpty()) return result; for (Map.Entry<String, RegionState> e : this.regionsInTransition.entrySet()) { if (Bytes.equals(regionname, e.getValue().getRegionName())) { this.regionsInTransition.remove(e.getKey()); LOG.debug("Removed " + e.getKey() + ", " + e.getValue()); result = true; break; } } } return result; } /* * State of a Region as it transitions from closed to open, etc. See * note on regionsInTransition data member above for listing of state * transitions. */ static class RegionState implements Comparable<RegionState> { private final HRegionInfo regionInfo; enum State { UNASSIGNED, // awaiting a server to be assigned PENDING_OPEN, // told a server to open, hasn't opened yet OPEN, // has been opened on RS, but not yet marked in META/ROOT CLOSING, // a msg has been enqueued to close ths region, but not delivered to RS yet PENDING_CLOSE, // msg has been delivered to RS to close this region CLOSED // region has been closed but not yet marked in meta } private State state; private boolean isOfflined; /* Set when region is assigned or closing */ private String serverName = null; /* Constructor */ RegionState(HRegionInfo info, State state) { this.regionInfo = info; this.state = state; } synchronized HRegionInfo getRegionInfo() { return this.regionInfo; } synchronized byte[] getRegionName() { return this.regionInfo.getRegionName(); } /* * @return Server this region was assigned to */ synchronized String getServerName() { return this.serverName; } /* * @return true if the region is being opened */ synchronized boolean isOpening() { return state == State.UNASSIGNED || state == State.PENDING_OPEN || state == State.OPEN; } /* * @return true if region is unassigned */ synchronized boolean isUnassigned() { return state == State.UNASSIGNED; } /* * Note: callers of this method (reassignRootRegion, * regionsAwaitingAssignment, setUnassigned) ensure that this method is not * called unless it is safe to do so. */ synchronized void setUnassigned() { state = State.UNASSIGNED; this.serverName = null; } synchronized boolean isPendingOpen() { return state == State.PENDING_OPEN; } /* * @param serverName Server region was assigned to. */ synchronized void setPendingOpen(final String serverName) { if (state != State.UNASSIGNED) { LOG.warn( "Cannot assign a region that is not currently unassigned. " + "FIX!! State: " + toString()); } state = State.PENDING_OPEN; this.serverName = serverName; } synchronized boolean isOpen() { return state == State.OPEN; } synchronized void setOpen() { if (state != State.PENDING_OPEN) { LOG.warn("Cannot set a region as open if it has not been pending. " + "FIX!! State: " + toString()); } state = State.OPEN; } synchronized boolean isClosing() { return state == State.CLOSING; } synchronized void setClosing(String serverName, boolean setOffline) { state = State.CLOSING; this.serverName = serverName; this.isOfflined = setOffline; } synchronized boolean isPendingClose() { return state == State.PENDING_CLOSE; } synchronized void setPendingClose() { if (state != State.CLOSING) { LOG.warn("Cannot set a region as pending close if it has not been " + "closing. FIX!! State: " + toString()); } state = State.PENDING_CLOSE; } synchronized boolean isClosed() { return state == State.CLOSED; } synchronized void setClosed() { if (state != State.PENDING_CLOSE && state != State.PENDING_OPEN && state != State.CLOSING) { throw new IllegalStateException("Cannot set a region to be closed if it was not already marked as" + " pending close, pending open or closing. State: " + this); } state = State.CLOSED; } synchronized boolean isOfflined() { return (state == State.CLOSING || state == State.PENDING_CLOSE) && isOfflined; } @Override public synchronized String toString() { return ("name=" + Bytes.toString(getRegionName()) + ", state=" + this.state); } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } return this.compareTo((RegionState) o) == 0; } @Override public int hashCode() { return Bytes.toString(getRegionName()).hashCode(); } @Override public int compareTo(RegionState o) { if (o == null) { return 1; } return Bytes.compareTo(getRegionName(), o.getRegionName()); } } /** * Method used to do housekeeping for holding regions for a RegionServer going * down for a restart * * @param regionServer * the RegionServer going down for a restart * @param regions * the HRegions it was previously serving */ public void addRegionServerForRestart(final HServerInfo regionServer, Set<HRegionInfo> regions) { LOG.debug("Holding regions of restartng server: " + regionServer.getServerName()); HServerAddress addr = regionServer.getServerAddress(); for (HRegionInfo region : regions) { assignmentManager.addTransientAssignment(addr, region); } } /** * Create a reopener for this table, if one exists, return the existing throttler. * @param tableName * @return */ public ThrottledRegionReopener createThrottledReopener(String tableName) { if (!tablesReopeningRegions.containsKey(tableName)) { ThrottledRegionReopener throttledReopener = new ThrottledRegionReopener(tableName, this.master, this); tablesReopeningRegions.put(tableName, throttledReopener); } return tablesReopeningRegions.get(tableName); } /** * Return the throttler for this table * @param tableName * @return */ public ThrottledRegionReopener getThrottledReopener(String tableName) { return tablesReopeningRegions.get(tableName); } /** * Delete the throttler when the operation is complete * @param tableName */ public void deleteThrottledReopener(String tableName) { // if tablesReopeningRegions.contains do something if (tablesReopeningRegions.containsKey(tableName)) { tablesReopeningRegions.remove(tableName); LOG.debug("Removed throttler for " + tableName); } else { LOG.debug("Tried to delete a throttled reopener, but it does not exist."); } } /** * When the region is opened, check if it is reopening and notify the throttler * for further processing. * @param region */ public void notifyRegionReopened(HRegionInfo region) { String tableName = region.getTableDesc().getNameAsString(); if (tablesReopeningRegions.containsKey(tableName)) { tablesReopeningRegions.get(tableName).notifyRegionOpened(region); } } MetaScanner getMetaScanner() { return metaScannerThread; } /** * Composes a map of .META. region locations for both online .META. regions and regions that * we know are assigned to regionservers, but have not been scanned yet. This is used on master * startup to write pending region location changes from the ZK unassigned directory to .META. */ NavigableMap<byte[], MetaRegion> getAllMetaRegionLocations() { NavigableMap<byte[], MetaRegion> m = new TreeMap<byte[], MetaRegion>(Bytes.BYTES_COMPARATOR); m.putAll(metaRegionLocationsBeforeScan); m.putAll(onlineMetaRegions); return m; } /** * Modifies region state in regionsInTransition based on the initial scan of the ZK unassigned * directory. * @param event event type written by the regionserver to the znode * @param regionInfo region info * @param serverName regionserver name */ void setRegionStateOnRecovery(HBaseEventType event, HRegionInfo regionInfo, String serverName) { String regionName = regionInfo.getRegionNameAsString(); String stateStr = null; if (event == HBaseEventType.RS2ZK_REGION_CLOSING || event == HBaseEventType.RS2ZK_REGION_CLOSED) { synchronized (regionsInTransition) { RegionState s = regionsInTransition.get(regionName); if (s == null) { s = new RegionState(regionInfo, RegionState.State.PENDING_CLOSE); regionsInTransition.put(regionName, s); } else { s.setClosing(serverName, s.isOfflined()); s.setPendingClose(); } stateStr = s.toString(); } } if (event == HBaseEventType.RS2ZK_REGION_OPENED || event == HBaseEventType.RS2ZK_REGION_OPENING) { synchronized (regionsInTransition) { RegionState s = regionsInTransition.get(regionName); if (s == null) { s = new RegionState(regionInfo, RegionState.State.PENDING_OPEN); regionsInTransition.put(regionName, s); } else { s.setUnassigned(); s.setPendingOpen(serverName); } stateStr = s.toString(); } } if (stateStr != null) { LOG.info("Set state in regionsInTransition: " + stateStr); } } /** Recovers root region location from ZK. Should only be called on master startup. */ void recoverRootRegionLocationFromZK() { HServerInfo rootLocationInZK = zkWrapper.readRootRegionServerInfo(); if (rootLocationInZK != null) { synchronized (rootRegionLocation) { rootRegionLocation.set(rootLocationInZK); rootRegionLocation.notifyAll(); } } } }