List of usage examples for java.util NavigableMap entrySet
Set<Map.Entry<K, V>> entrySet();
From source file:org.apache.hadoop.hbase.master.handler.ServerShutdownHandler.java
@Override public void process() throws IOException { boolean hasLogReplayWork = false; final ServerName serverName = this.serverName; try {/* w w w .ja v a 2s . c om*/ // We don't want worker thread in the MetaServerShutdownHandler // executor pool to block by waiting availability of hbase:meta // Otherwise, it could run into the following issue: // 1. The current MetaServerShutdownHandler instance For RS1 waits for the hbase:meta // to come online. // 2. The newly assigned hbase:meta region server RS2 was shutdown right after // it opens the hbase:meta region. So the MetaServerShutdownHandler // instance For RS1 will still be blocked. // 3. The new instance of MetaServerShutdownHandler for RS2 is queued. // 4. The newly assigned hbase:meta region server RS3 was shutdown right after // it opens the hbase:meta region. So the MetaServerShutdownHandler // instance For RS1 and RS2 will still be blocked. // 5. The new instance of MetaServerShutdownHandler for RS3 is queued. // 6. Repeat until we run out of MetaServerShutdownHandler worker threads // The solution here is to resubmit a ServerShutdownHandler request to process // user regions on that server so that MetaServerShutdownHandler // executor pool is always available. // // If AssignmentManager hasn't finished rebuilding user regions, // we are not ready to assign dead regions either. So we re-queue up // the dead server for further processing too. AssignmentManager am = services.getAssignmentManager(); if (isCarryingMeta() // hbase:meta || !am.isFailoverCleanupDone()) { this.services.getServerManager().processDeadServer(serverName, this.shouldSplitHlog); return; } // Wait on meta to come online; we need it to progress. // TODO: Best way to hold strictly here? We should build this retry logic // into the MetaReader operations themselves. // TODO: Is the reading of hbase:meta necessary when the Master has state of // cluster in its head? It should be possible to do without reading hbase:meta // in all but one case. On split, the RS updates the hbase:meta // table and THEN informs the master of the split via zk nodes in // 'unassigned' dir. Currently the RS puts ephemeral nodes into zk so if // the regionserver dies, these nodes do not stick around and this server // shutdown processing does fixup (see the fixupDaughters method below). // If we wanted to skip the hbase:meta scan, we'd have to change at least the // final SPLIT message to be permanent in zk so in here we'd know a SPLIT // completed (zk is updated after edits to hbase:meta have gone in). See // {@link SplitTransaction}. We'd also have to be figure another way for // doing the below hbase:meta daughters fixup. NavigableMap<HRegionInfo, Result> hris = null; while (!this.server.isStopped()) { try { this.server.getCatalogTracker().waitForMeta(); // Skip getting user regions if the server is stopped. if (!this.server.isStopped()) { hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(), this.serverName); } break; } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw (InterruptedIOException) new InterruptedIOException().initCause(e); } catch (IOException ioe) { LOG.info("Received exception accessing hbase:meta during server shutdown of " + serverName + ", retrying hbase:meta read", ioe); } } if (this.server.isStopped()) { throw new IOException("Server is stopped"); } try { if (this.shouldSplitHlog) { LOG.info("Splitting logs for " + serverName + " before assignment."); if (this.distributedLogReplay) { LOG.info("Mark regions in recovery before assignment."); Set<ServerName> serverNames = new HashSet<ServerName>(); serverNames.add(serverName); this.services.getMasterFileSystem().prepareLogReplay(serverNames); } else { this.services.getMasterFileSystem().splitLog(serverName); } am.getRegionStates().logSplit(serverName); } else { LOG.info("Skipping log splitting for " + serverName); } } catch (IOException ioe) { resubmit(serverName, ioe); } // Clean out anything in regions in transition. Being conservative and // doing after log splitting. Could do some states before -- OPENING? // OFFLINE? -- and then others after like CLOSING that depend on log // splitting. List<HRegionInfo> regionsInTransition = am.processServerShutdown(serverName); LOG.info("Reassigning " + ((hris == null) ? 0 : hris.size()) + " region(s) that " + (serverName == null ? "null" : serverName) + " was carrying (and " + regionsInTransition.size() + " regions(s) that were opening on this server)"); List<HRegionInfo> toAssignRegions = new ArrayList<HRegionInfo>(); toAssignRegions.addAll(regionsInTransition); // Iterate regions that were on this server and assign them if (hris != null) { RegionStates regionStates = am.getRegionStates(); for (Map.Entry<HRegionInfo, Result> e : hris.entrySet()) { HRegionInfo hri = e.getKey(); if (regionsInTransition.contains(hri)) { continue; } String encodedName = hri.getEncodedName(); Lock lock = am.acquireRegionLock(encodedName); try { RegionState rit = regionStates.getRegionTransitionState(hri); if (processDeadRegion(hri, e.getValue(), am, server.getCatalogTracker())) { ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri); if (addressFromAM != null && !addressFromAM.equals(this.serverName)) { // If this region is in transition on the dead server, it must be // opening or pending_open, which should have been covered by AM#processServerShutdown LOG.info("Skip assigning region " + hri.getRegionNameAsString() + " because it has been opened in " + addressFromAM.getServerName()); continue; } if (rit != null) { if (rit.getServerName() != null && !rit.isOnServer(serverName)) { // Skip regions that are in transition on other server LOG.info("Skip assigning region in transition on other server" + rit); continue; } try { //clean zk node LOG.info("Reassigning region with rs = " + rit + " and deleting zk node if exists"); ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), hri); regionStates.updateRegionState(hri, State.OFFLINE); } catch (KeeperException ke) { this.server.abort("Unexpected ZK exception deleting unassigned node " + hri, ke); return; } } else if (regionStates.isRegionInState(hri, State.SPLITTING_NEW, State.MERGING_NEW)) { regionStates.regionOffline(hri); } toAssignRegions.add(hri); } else if (rit != null) { if (rit.isPendingCloseOrClosing() && am.getTableStateManager().isTableState( hri.getTable(), ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING)) { // If the table was partially disabled and the RS went down, we should clear the RIT // and remove the node for the region. // The rit that we use may be stale in case the table was in DISABLING state // but though we did assign we will not be clearing the znode in CLOSING state. // Doing this will have no harm. See HBASE-5927 regionStates.updateRegionState(hri, State.OFFLINE); am.deleteClosingOrClosedNode(hri, rit.getServerName()); am.offlineDisabledRegion(hri); } else { LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition " + rit + " not to be assigned by SSH of server " + serverName); } } } finally { lock.unlock(); } } } try { am.assign(toAssignRegions); } catch (InterruptedException ie) { LOG.error("Caught " + ie + " during round-robin assignment"); throw (InterruptedIOException) new InterruptedIOException().initCause(ie); } if (this.shouldSplitHlog && this.distributedLogReplay) { // wait for region assignment completes for (HRegionInfo hri : toAssignRegions) { try { if (!am.waitOnRegionToClearRegionsInTransition(hri, regionAssignmentWaitTimeout)) { // Wait here is to avoid log replay hits current dead server and incur a RPC timeout // when replay happens before region assignment completes. LOG.warn("Region " + hri.getEncodedName() + " didn't complete assignment in time"); } } catch (InterruptedException ie) { throw new InterruptedIOException( "Caught " + ie + " during waitOnRegionToClearRegionsInTransition"); } } // submit logReplay work this.services.getExecutorService().submit( new LogReplayHandler(this.server, this.services, this.deadServers, this.serverName)); hasLogReplayWork = true; } } finally { this.deadServers.finish(serverName); } if (!hasLogReplayWork) { LOG.info("Finished processing of shutdown of " + serverName); } }
From source file:com.google.gwt.emultest.java.util.TreeMapTest.java
@SuppressWarnings("SuspiciousMethodCalls") public void testEntrySet_contains() { K[] keys = getSortedKeys();//from ww w . ja v a 2 s . c o m V[] values = getSortedValues(); NavigableMap<K, V> master = createNavigableMap(); NavigableMap<K, V> testMap = createNavigableMap(); master.put(keys[0], null); Object[] entry = master.entrySet().toArray(); assertFalse(testMap.entrySet().contains(entry[0])); Map<K, V> submap = testMap.subMap(keys[2], keys[3]); entry = master.entrySet().toArray(); assertFalse(submap.entrySet().contains(entry[0])); testMap.put(keys[0], null); assertTrue(testMap.entrySet().containsAll(master.entrySet())); master.clear(); master.put(keys[0], values[0]); entry = master.entrySet().toArray(); assertFalse(testMap.entrySet().contains(entry[0])); }
From source file:com.google.gwt.emultest.java.util.TreeMapTest.java
public void testEntrySet() { K[] keys = getSortedKeys();// ww w. j a v a2 s . c o m V[] values = getSortedValues(); NavigableMap<K, V> map = createNavigableMap(); map.put(keys[0], values[0]); map.put(keys[1], values[1]); map.put(keys[2], values[2]); Set<Map.Entry<K, V>> entries = map.entrySet(); Iterator<Map.Entry<K, V>> entrySetIterator = entries.iterator(); assertEquals(3, entries.size()); assertEquals(keys[0] + "=" + values[0], entrySetIterator.next().toString()); while (entrySetIterator.hasNext()) { Map.Entry<K, V> entry = entrySetIterator.next(); assertTrue(map.get(entry.getKey()) == entry.getValue()); } assertEquals(map.size(), entries.size()); _assertEquals(entries, map.entrySet()); map.clear(); assertEquals(map.size(), entries.size()); _assertEquals(entries, map.entrySet()); map.put(keys[0], values[0]); assertEquals(map.size(), entries.size()); _assertEquals(entries, map.entrySet()); entries.clear(); assertEquals(map.size(), entries.size()); _assertEquals(entries, map.entrySet()); map.put(keys[1], values[1]); map.put(keys[2], values[2]); Iterator<Entry<K, V>> it = entries.iterator(); while (it.hasNext()) { Map.Entry<K, V> entry = it.next(); map.containsKey(entry.getKey()); map.containsValue(entry.getValue()); it.remove(); } try { it.next(); fail("should throw NoSuchElementException"); } catch (NoSuchElementException expected) { } _assertEmpty(map); }
From source file:org.commonvox.hbase_column_manager.TestRepositoryAdmin.java
private void verifyColumnData(Configuration configuration, boolean useDetailedScan) throws IOException { try (Connection connection = MConnectionFactory.createConnection(configuration)) { for (TableName tableName : testTableNamesAndDescriptors.keySet()) { List<Result> rows; Scan scan = new Scan().setMaxVersions(); if (useDetailedScan) { // scan.addFamily(CF01); // scan.addFamily(CF02); scan.addColumn(CF01, COLQUALIFIER01); scan.addColumn(CF01, COLQUALIFIER02); scan.addColumn(CF01, COLQUALIFIER03); scan.addColumn(CF02, COLQUALIFIER04); scan.addColumn(CF01, QUALIFIER_IN_EXCLUDED_TABLE); scan.addColumn(CF02, QUALIFIER_IN_EXCLUDED_TABLE); }/*from w ww .ja va2s . c o m*/ rows = getUserTableRows(connection, tableName, scan); System.out.println("CONTENTS of user Table: " + tableName.getNameAsString() + " retrieved with " + (useDetailedScan ? "DETAILED" : "EMPTY") + " Scan parms"); for (Result row : rows) { System.out.println(" **ROW-ID**: " + Bytes.toString(row.getRow())); NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> contentMap = row .getMap(); for (Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> familyMap : contentMap .entrySet()) { System.out.println(" -- Column Family: " + Bytes.toString(familyMap.getKey())); for (Entry<byte[], NavigableMap<Long, byte[]>> columnMap : familyMap.getValue() .entrySet()) { if (Repository.isPrintable(columnMap.getKey())) { System.out.println(" -- Column: " + Bytes.toString(columnMap.getKey())); } else { try { System.out.println(" -- Column (ALIAS): " + Bytes.toInt(columnMap.getKey())); } catch (IllegalArgumentException e) { System.out.println(" -- Column name UNPRINTABLE (neither String nor int)!!"); } } for (Entry<Long, byte[]> cellMap : columnMap.getValue().entrySet()) { // System.out.println(" -- Cell Timestamp: " + cellMap.getKey().toString()); System.out.println(" -- Cell Value: " + Bytes.toString(cellMap.getValue())); } } } } } } }
From source file:com.google.gwt.emultest.java.util.TreeMapTest.java
public void testDescendingMap() { K[] keys = getSortedKeys();/*from ww w. j av a2 s .c o m*/ V[] values = getSortedValues(); NavigableMap<K, V> map = createNavigableMap(); map.put(keys[0], values[0]); NavigableMap<K, V> descendingMap = map.descendingMap(); _assertEquals(descendingMap, map.descendingMap()); map.put(keys[1], values[1]); _assertEquals(map, descendingMap.descendingMap()); _assertEquals(reverseCollection(map.entrySet()), descendingMap.entrySet()); descendingMap.put(keys[2], values[2]); _assertEquals(reverseCollection(map.entrySet()), descendingMap.entrySet()); _assertEquals(map.entrySet(), descendingMap.descendingMap().entrySet()); descendingMap.remove(keys[1]); _assertEquals(reverseCollection(map.entrySet()), descendingMap.entrySet()); descendingMap.clear(); assertEquals(0, descendingMap.size()); assertEquals(0, map.size()); map.put(keys[0], values[0]); map.put(keys[1], values[1]); map.put(keys[2], values[2]); assertEquals(3, descendingMap.size()); NavigableMap<K, V> headMap = descendingMap.headMap(keys[1], false); assertEquals(1, headMap.size()); assertTrue(headMap.containsKey(keys[2])); NavigableMap<K, V> subMap = descendingMap.subMap(keys[2], true, keys[1], true); assertEquals(2, subMap.size()); assertTrue(subMap.containsKey(keys[1])); assertTrue(subMap.containsKey(keys[2])); NavigableMap<K, V> tailMap = descendingMap.tailMap(keys[1], false); assertEquals(1, tailMap.size()); assertTrue(tailMap.containsKey(keys[0])); }
From source file:org.apache.hadoop.hbase.master.balancer.DefaultLoadBalancer.java
/** * Generate a global load balancing plan according to the specified map of * server information to the most loaded regions of each server. * * The load balancing invariant is that all servers are within 1 region of the * average number of regions per server. If the average is an integer number, * all servers will be balanced to the average. Otherwise, all servers will * have either floor(average) or ceiling(average) regions. * * HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that * we can fetch from both ends of the queue. * At the beginning, we check whether there was empty region server * just discovered by Master. If so, we alternately choose new / old * regions from head / tail of regionsToMove, respectively. This alternation * avoids clustering young regions on the newly discovered region server. * Otherwise, we choose new regions from head of regionsToMove. * //from w ww.j a v a 2s . co m * Another improvement from HBASE-3609 is that we assign regions from * regionsToMove to underloaded servers in round-robin fashion. * Previously one underloaded server would be filled before we move onto * the next underloaded server, leading to clustering of young regions. * * Finally, we randomly shuffle underloaded servers so that they receive * offloaded regions relatively evenly across calls to balanceCluster(). * * The algorithm is currently implemented as such: * * <ol> * <li>Determine the two valid numbers of regions each server should have, * <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average). * * <li>Iterate down the most loaded servers, shedding regions from each so * each server hosts exactly <b>MAX</b> regions. Stop once you reach a * server that already has <= <b>MAX</b> regions. * <p> * Order the regions to move from most recent to least. * * <li>Iterate down the least loaded servers, assigning regions so each server * has exactly </b>MIN</b> regions. Stop once you reach a server that * already has >= <b>MIN</b> regions. * * Regions being assigned to underloaded servers are those that were shed * in the previous step. It is possible that there were not enough * regions shed to fill each underloaded server to <b>MIN</b>. If so we * end up with a number of regions required to do so, <b>neededRegions</b>. * * It is also possible that we were able to fill each underloaded but ended * up with regions that were unassigned from overloaded servers but that * still do not have assignment. * * If neither of these conditions hold (no regions needed to fill the * underloaded servers, no regions leftover from overloaded servers), * we are done and return. Otherwise we handle these cases below. * * <li>If <b>neededRegions</b> is non-zero (still have underloaded servers), * we iterate the most loaded servers again, shedding a single server from * each (this brings them from having <b>MAX</b> regions to having * <b>MIN</b> regions). * * <li>We now definitely have more regions that need assignment, either from * the previous step or from the original shedding from overloaded servers. * Iterate the least loaded servers filling each to <b>MIN</b>. * * <li>If we still have more regions that need assignment, again iterate the * least loaded servers, this time giving each one (filling them to * </b>MAX</b>) until we run out. * * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions. * * In addition, any server hosting >= <b>MAX</b> regions is guaranteed * to end up with <b>MAX</b> regions at the end of the balancing. This * ensures the minimal number of regions possible are moved. * </ol> * * TODO: We can at-most reassign the number of regions away from a particular * server to be how many they report as most loaded. * Should we just keep all assignment in memory? Any objections? * Does this mean we need HeapSize on HMaster? Or just careful monitor? * (current thinking is we will hold all assignments in memory) * * @param clusterMap Map of regionservers and their load/region information to * a list of their most loaded regions * @return a list of regions to be moved, including source and destination, * or null if cluster is already balanced */ public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterMap) { boolean emptyRegionServerPresent = false; long startTime = System.currentTimeMillis(); ClusterLoadState cs = new ClusterLoadState(clusterMap); if (!this.needsBalance(cs)) return null; int numServers = cs.getNumServers(); NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad(); int numRegions = cs.getNumRegions(); int min = numRegions / numServers; int max = numRegions % numServers == 0 ? min : min + 1; // Using to check balance result. StringBuilder strBalanceParam = new StringBuilder(); strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=") .append(numServers).append(", max=").append(max).append(", min=").append(min); LOG.debug(strBalanceParam.toString()); // Balance the cluster // TODO: Look at data block locality or a more complex load to do this MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create(); List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>(); // Walk down most loaded, pruning each to the max int serversOverloaded = 0; // flag used to fetch regions from head and tail of list, alternately boolean fetchFromTail = false; Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>(); for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) { ServerAndLoad sal = server.getKey(); int regionCount = sal.getLoad(); if (regionCount <= max) { serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0)); break; } serversOverloaded++; List<HRegionInfo> regions = server.getValue(); int numToOffload = Math.min(regionCount - max, regions.size()); // account for the out-of-band regions which were assigned to this server // after some other region server crashed Collections.sort(regions, riComparator); int numTaken = 0; for (int i = 0; i <= numToOffload;) { HRegionInfo hri = regions.get(i); // fetch from head if (fetchFromTail) { hri = regions.get(regions.size() - 1 - i); } i++; // Don't rebalance meta regions. if (hri.isMetaRegion()) continue; regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null)); numTaken++; if (numTaken >= numToOffload) break; // fetch in alternate order if there is new region server if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } } serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken)); } int totalNumMoved = regionsToMove.size(); // Walk down least loaded, filling each to the min int neededRegions = 0; // number of regions needed to bring all up to min fetchFromTail = false; Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>(); float average = (float) numRegions / numServers; // for logging int maxToTake = numRegions - (int) average; for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { if (maxToTake == 0) break; // no more to take int regionCount = server.getKey().getLoad(); if (regionCount >= min && regionCount > 0) { continue; // look for other servers which haven't reached min } int regionsToPut = min - regionCount; if (regionsToPut == 0) { regionsToPut = 1; maxToTake--; } underloadedServers.put(server.getKey().getServerName(), regionsToPut); } // number of servers that get new regions int serversUnderloaded = underloadedServers.size(); int incr = 1; List<ServerName> sns = Arrays .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded])); Collections.shuffle(sns, RANDOM); while (regionsToMove.size() > 0) { int cnt = 0; int i = incr > 0 ? 0 : underloadedServers.size() - 1; for (; i >= 0 && i < underloadedServers.size(); i += incr) { if (regionsToMove.isEmpty()) break; ServerName si = sns.get(i); int numToTake = underloadedServers.get(si); if (numToTake == 0) continue; addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn); if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } underloadedServers.put(si, numToTake - 1); cnt++; BalanceInfo bi = serverBalanceInfo.get(si); if (bi == null) { bi = new BalanceInfo(0, 0); serverBalanceInfo.put(si, bi); } bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1); } if (cnt == 0) break; // iterates underloadedServers in the other direction incr = -incr; } for (Integer i : underloadedServers.values()) { // If we still want to take some, increment needed neededRegions += i; } // If none needed to fill all to min and none left to drain all to max, // we are done if (neededRegions == 0 && regionsToMove.isEmpty()) { long endTime = System.currentTimeMillis(); LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return regionsToReturn; } // Need to do a second pass. // Either more regions to assign out or servers that are still underloaded // If we need more to fill min, grab one from each most loaded until enough if (neededRegions != 0) { // Walk down most loaded, grabbing one from each until we get enough for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) { BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload(); if (idx >= server.getValue().size()) break; HRegionInfo region = server.getValue().get(idx); if (region.isMetaRegion()) continue; // Don't move meta regions. regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null)); totalNumMoved++; if (--neededRegions == 0) { // No more regions needed, done shedding break; } } } // Now we have a set of regions that must be all assigned out // Assign each underloaded up to the min, then if leftovers, assign to max // Walk down least loaded, assigning to each to fill up to min for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { int regionCount = server.getKey().getLoad(); if (regionCount >= min) break; BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); if (balanceInfo != null) { regionCount += balanceInfo.getNumRegionsAdded(); } if (regionCount >= min) { continue; } int numToTake = min - regionCount; int numTaken = 0; while (numTaken < numToTake && 0 < regionsToMove.size()) { addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn); numTaken++; if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } } } // If we still have regions to dish out, assign underloaded to max if (0 < regionsToMove.size()) { for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { int regionCount = server.getKey().getLoad(); if (regionCount >= max) { break; } addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn); if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } if (regionsToMove.isEmpty()) { break; } } } long endTime = System.currentTimeMillis(); if (!regionsToMove.isEmpty() || neededRegions != 0) { // Emit data so can diagnose how balancer went astray. LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded); StringBuilder sb = new StringBuilder(); for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterMap.entrySet()) { if (sb.length() > 0) sb.append(", "); sb.append(e.getKey().toString()); sb.append(" "); sb.append(e.getValue().size()); } LOG.warn("Input " + sb.toString()); } // All done! LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return regionsToReturn; }
From source file:org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer.java
/** * Generate a global load balancing plan according to the specified map of * server information to the most loaded regions of each server. * * The load balancing invariant is that all servers are within 1 region of the * average number of regions per server. If the average is an integer number, * all servers will be balanced to the average. Otherwise, all servers will * have either floor(average) or ceiling(average) regions. * * HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that * we can fetch from both ends of the queue. * At the beginning, we check whether there was empty region server * just discovered by Master. If so, we alternately choose new / old * regions from head / tail of regionsToMove, respectively. This alternation * avoids clustering young regions on the newly discovered region server. * Otherwise, we choose new regions from head of regionsToMove. * /* w w w . j a v a 2 s.co m*/ * Another improvement from HBASE-3609 is that we assign regions from * regionsToMove to underloaded servers in round-robin fashion. * Previously one underloaded server would be filled before we move onto * the next underloaded server, leading to clustering of young regions. * * Finally, we randomly shuffle underloaded servers so that they receive * offloaded regions relatively evenly across calls to balanceCluster(). * * The algorithm is currently implemented as such: * * <ol> * <li>Determine the two valid numbers of regions each server should have, * <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average). * * <li>Iterate down the most loaded servers, shedding regions from each so * each server hosts exactly <b>MAX</b> regions. Stop once you reach a * server that already has <= <b>MAX</b> regions. * <p> * Order the regions to move from most recent to least. * * <li>Iterate down the least loaded servers, assigning regions so each server * has exactly </b>MIN</b> regions. Stop once you reach a server that * already has >= <b>MIN</b> regions. * * Regions being assigned to underloaded servers are those that were shed * in the previous step. It is possible that there were not enough * regions shed to fill each underloaded server to <b>MIN</b>. If so we * end up with a number of regions required to do so, <b>neededRegions</b>. * * It is also possible that we were able to fill each underloaded but ended * up with regions that were unassigned from overloaded servers but that * still do not have assignment. * * If neither of these conditions hold (no regions needed to fill the * underloaded servers, no regions leftover from overloaded servers), * we are done and return. Otherwise we handle these cases below. * * <li>If <b>neededRegions</b> is non-zero (still have underloaded servers), * we iterate the most loaded servers again, shedding a single server from * each (this brings them from having <b>MAX</b> regions to having * <b>MIN</b> regions). * * <li>We now definitely have more regions that need assignment, either from * the previous step or from the original shedding from overloaded servers. * Iterate the least loaded servers filling each to <b>MIN</b>. * * <li>If we still have more regions that need assignment, again iterate the * least loaded servers, this time giving each one (filling them to * </b>MAX</b>) until we run out. * * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions. * * In addition, any server hosting >= <b>MAX</b> regions is guaranteed * to end up with <b>MAX</b> regions at the end of the balancing. This * ensures the minimal number of regions possible are moved. * </ol> * * TODO: We can at-most reassign the number of regions away from a particular * server to be how many they report as most loaded. * Should we just keep all assignment in memory? Any objections? * Does this mean we need HeapSize on HMaster? Or just careful monitor? * (current thinking is we will hold all assignments in memory) * * @param clusterMap Map of regionservers and their load/region information to * a list of their most loaded regions * @return a list of regions to be moved, including source and destination, * or null if cluster is already balanced */ public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterMap) { List<RegionPlan> regionsToReturn = balanceMasterRegions(clusterMap); if (regionsToReturn != null) { return regionsToReturn; } filterExcludedServers(clusterMap); boolean emptyRegionServerPresent = false; long startTime = System.currentTimeMillis(); Collection<ServerName> backupMasters = getBackupMasters(); ClusterLoadState cs = new ClusterLoadState(masterServerName, backupMasters, backupMasterWeight, clusterMap); if (!this.needsBalance(cs)) return null; int numServers = cs.getNumServers(); NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad(); int numRegions = cs.getNumRegions(); float average = cs.getLoadAverage(); int max = (int) Math.ceil(average); int min = (int) average; // Using to check balance result. StringBuilder strBalanceParam = new StringBuilder(); strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=") .append(numServers).append(", numBackupMasters=").append(cs.getNumBackupMasters()) .append(", backupMasterWeight=").append(backupMasterWeight).append(", max=").append(max) .append(", min=").append(min); LOG.debug(strBalanceParam.toString()); // Balance the cluster // TODO: Look at data block locality or a more complex load to do this MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create(); regionsToReturn = new ArrayList<RegionPlan>(); // Walk down most loaded, pruning each to the max int serversOverloaded = 0; // flag used to fetch regions from head and tail of list, alternately boolean fetchFromTail = false; Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>(); for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) { ServerAndLoad sal = server.getKey(); int load = sal.getLoad(); if (load <= max) { serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0)); break; } serversOverloaded++; List<HRegionInfo> regions = server.getValue(); int w = 1; // Normal region server has weight 1 if (backupMasters != null && backupMasters.contains(sal.getServerName())) { w = backupMasterWeight; // Backup master has heavier weight } int numToOffload = Math.min((load - max) / w, regions.size()); // account for the out-of-band regions which were assigned to this server // after some other region server crashed Collections.sort(regions, riComparator); int numTaken = 0; for (int i = 0; i <= numToOffload;) { HRegionInfo hri = regions.get(i); // fetch from head if (fetchFromTail) { hri = regions.get(regions.size() - 1 - i); } i++; // Don't rebalance special regions. if (shouldBeOnMaster(hri) && masterServerName.equals(sal.getServerName())) continue; regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null)); numTaken++; if (numTaken >= numToOffload) break; // fetch in alternate order if there is new region server if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } } serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken)); } int totalNumMoved = regionsToMove.size(); // Walk down least loaded, filling each to the min int neededRegions = 0; // number of regions needed to bring all up to min fetchFromTail = false; Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>(); int maxToTake = numRegions - min; for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { if (maxToTake == 0) break; // no more to take int load = server.getKey().getLoad(); if (load >= min && load > 0) { continue; // look for other servers which haven't reached min } int w = 1; // Normal region server has weight 1 if (backupMasters != null && backupMasters.contains(server.getKey().getServerName())) { w = backupMasterWeight; // Backup master has heavier weight } int regionsToPut = (min - load) / w; if (regionsToPut == 0) { regionsToPut = 1; } maxToTake -= regionsToPut; underloadedServers.put(server.getKey().getServerName(), regionsToPut); } // number of servers that get new regions int serversUnderloaded = underloadedServers.size(); int incr = 1; List<ServerName> sns = Arrays .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded])); Collections.shuffle(sns, RANDOM); while (regionsToMove.size() > 0) { int cnt = 0; int i = incr > 0 ? 0 : underloadedServers.size() - 1; for (; i >= 0 && i < underloadedServers.size(); i += incr) { if (regionsToMove.isEmpty()) break; ServerName si = sns.get(i); int numToTake = underloadedServers.get(si); if (numToTake == 0) continue; addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn); if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } underloadedServers.put(si, numToTake - 1); cnt++; BalanceInfo bi = serverBalanceInfo.get(si); if (bi == null) { bi = new BalanceInfo(0, 0); serverBalanceInfo.put(si, bi); } bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1); } if (cnt == 0) break; // iterates underloadedServers in the other direction incr = -incr; } for (Integer i : underloadedServers.values()) { // If we still want to take some, increment needed neededRegions += i; } // If none needed to fill all to min and none left to drain all to max, // we are done if (neededRegions == 0 && regionsToMove.isEmpty()) { long endTime = System.currentTimeMillis(); LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return regionsToReturn; } // Need to do a second pass. // Either more regions to assign out or servers that are still underloaded // If we need more to fill min, grab one from each most loaded until enough if (neededRegions != 0) { // Walk down most loaded, grabbing one from each until we get enough for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) { BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload(); if (idx >= server.getValue().size()) break; HRegionInfo region = server.getValue().get(idx); if (region.isMetaRegion()) continue; // Don't move meta regions. regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null)); totalNumMoved++; if (--neededRegions == 0) { // No more regions needed, done shedding break; } } } // Now we have a set of regions that must be all assigned out // Assign each underloaded up to the min, then if leftovers, assign to max // Walk down least loaded, assigning to each to fill up to min for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { int regionCount = server.getKey().getLoad(); if (regionCount >= min) break; BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); if (balanceInfo != null) { regionCount += balanceInfo.getNumRegionsAdded(); } if (regionCount >= min) { continue; } int numToTake = min - regionCount; int numTaken = 0; while (numTaken < numToTake && 0 < regionsToMove.size()) { addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn); numTaken++; if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } } } // If we still have regions to dish out, assign underloaded to max if (0 < regionsToMove.size()) { for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { int regionCount = server.getKey().getLoad(); BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); if (balanceInfo != null) { regionCount += balanceInfo.getNumRegionsAdded(); } if (regionCount >= max) { break; } addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn); if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } if (regionsToMove.isEmpty()) { break; } } } long endTime = System.currentTimeMillis(); if (!regionsToMove.isEmpty() || neededRegions != 0) { // Emit data so can diagnose how balancer went astray. LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded); StringBuilder sb = new StringBuilder(); for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterMap.entrySet()) { if (sb.length() > 0) sb.append(", "); sb.append(e.getKey().toString()); sb.append(" "); sb.append(e.getValue().size()); } LOG.warn("Input " + sb.toString()); } // All done! LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return regionsToReturn; }
From source file:com.alibaba.wasp.master.balancer.DefaultLoadBalancer.java
/** * Generate a global load balancing plan according to the specified map of * server information to the most loaded entityGroups of each server. * //ww w.ja v a2s . c o m * The load balancing invariant is that all servers are within 1 entityGroup of the * average number of entityGroups per server. If the average is an integer number, * all servers will be balanced to the average. Otherwise, all servers will * have either floor(average) or ceiling(average) entityGroups. * * HBASE-3609 Modeled entityGroupsToMove using Guava's MinMaxPriorityQueue so that * we can fetch from both ends of the queue. At the beginning, we check * whether there was empty entityGroup server just discovered by Master. If so, we * alternately choose new / old entityGroups from head / tail of entityGroupsToMove, * respectively. This alternation avoids clustering young entityGroups on the newly * discovered entityGroup server. Otherwise, we choose new entityGroups from head of * entityGroupsToMove. * * Another improvement from HBASE-3609 is that we assign entityGroups from * entityGroupsToMove to underloaded servers in round-robin fashion. Previously one * underloaded server would be filled before we move onto the next underloaded * server, leading to clustering of young entityGroups. * * Finally, we randomly shuffle underloaded servers so that they receive * offloaded entityGroups relatively evenly across calls to balanceCluster(). * * The algorithm is currently implemented as such: * * <ol> * <li>Determine the two valid numbers of entityGroups each server should have, * <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average). * * <li>Iterate down the most loaded servers, shedding entityGroups from each so * each server hosts exactly <b>MAX</b> entityGroups. Stop once you reach a server * that already has <= <b>MAX</b> entityGroups. * <p> * Order the entityGroups to move from most recent to least. * * <li>Iterate down the least loaded servers, assigning entityGroups so each server * has exactly </b>MIN</b> entityGroups. Stop once you reach a server that already * has >= <b>MIN</b> entityGroups. * * EntityGroups being assigned to underloaded servers are those that were shed in * the previous step. It is possible that there were not enough entityGroups shed * to fill each underloaded server to <b>MIN</b>. If so we end up with a * number of entityGroups required to do so, <b>neededEntityGroups</b>. * * It is also possible that we were able to fill each underloaded but ended up * with entityGroups that were unassigned from overloaded servers but that still do * not have assignment. * * If neither of these conditions hold (no entityGroups needed to fill the * underloaded servers, no entityGroups leftover from overloaded servers), we are * done and return. Otherwise we handle these cases below. * * <li>If <b>neededEntityGroups</b> is non-zero (still have underloaded servers), * we iterate the most loaded servers again, shedding a single server from * each (this brings them from having <b>MAX</b> entityGroups to having <b>MIN</b> * entityGroups). * * <li>We now definitely have more entityGroups that need assignment, either from * the previous step or from the original shedding from overloaded servers. * Iterate the least loaded servers filling each to <b>MIN</b>. * * <li>If we still have more entityGroups that need assignment, again iterate the * least loaded servers, this time giving each one (filling them to * </b>MAX</b>) until we run out. * * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> entityGroups. * * In addition, any server hosting >= <b>MAX</b> entityGroups is guaranteed to * end up with <b>MAX</b> entityGroups at the end of the balancing. This ensures * the minimal number of entityGroups possible are moved. * </ol> * * TODO: We can at-most reassign the number of entityGroups away from a particular * server to be how many they report as most loaded. Should we just keep all * assignment in memory? Any objections? Does this mean we need HeapSize on * HMaster? Or just careful monitor? (current thinking is we will hold all * assignments in memory) * * @param clusterState Map of entityGroupservers and their load/entityGroup information * to a list of their most loaded entityGroups * @return a list of entityGroups to be moved, including source and destination, or * null if cluster is already balanced */ public List<EntityGroupPlan> balanceCluster(Map<ServerName, List<EntityGroupInfo>> clusterMap) { boolean emptyFServerPresent = false; long startTime = System.currentTimeMillis(); ClusterLoadState cs = new ClusterLoadState(clusterMap); int numServers = cs.getNumServers(); if (numServers == 0) { LOG.debug("numServers=0 so skipping load balancing"); return null; } NavigableMap<ServerAndLoad, List<EntityGroupInfo>> serversByLoad = cs.getServersByLoad(); int numEntityGroups = cs.getNumEntityGroups(); if (!this.needsBalance(cs)) { // Skipped because no server outside (min,max) range float average = cs.getLoadAverage(); // for logging LOG.info("Skipping load balancing because balanced cluster; " + "servers=" + numServers + " " + "entityGroups=" + numEntityGroups + " average=" + average + " " + "mostloaded=" + serversByLoad.lastKey().getLoad() + " leastloaded=" + serversByLoad.firstKey().getLoad()); return null; } int min = numEntityGroups / numServers; int max = numEntityGroups % numServers == 0 ? min : min + 1; // Using to check balance result. StringBuilder strBalanceParam = new StringBuilder(); strBalanceParam.append("Balance parameter: numEntityGroups=").append(numEntityGroups) .append(", numServers=").append(numServers).append(", max=").append(max).append(", min=") .append(min); LOG.debug(strBalanceParam.toString()); // Balance the cluster // TODO: Look at data block locality or a more complex load to do this MinMaxPriorityQueue<EntityGroupPlan> entityGroupsToMove = MinMaxPriorityQueue.orderedBy(rpComparator) .create(); List<EntityGroupPlan> entityGroupsToReturn = new ArrayList<EntityGroupPlan>(); // Walk down most loaded, pruning each to the max int serversOverloaded = 0; // flag used to fetch entityGroups from head and tail of list, alternately boolean fetchFromTail = false; Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>(); for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.descendingMap().entrySet()) { ServerAndLoad sal = server.getKey(); int entityGroupCount = sal.getLoad(); if (entityGroupCount <= max) { serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0)); break; } serversOverloaded++; List<EntityGroupInfo> entityGroups = server.getValue(); int numToOffload = Math.min(entityGroupCount - max, entityGroups.size()); // account for the out-of-band entityGroups which were assigned to this server // after some other entityGroup server crashed Collections.sort(entityGroups, riComparator); int numTaken = 0; for (int i = 0; i <= numToOffload;) { EntityGroupInfo egInfo = entityGroups.get(i); // fetch from head if (fetchFromTail) { egInfo = entityGroups.get(entityGroups.size() - 1 - i); } i++; entityGroupsToMove.add(new EntityGroupPlan(egInfo, sal.getServerName(), null)); numTaken++; if (numTaken >= numToOffload) break; // fetch in alternate order if there is new entityGroup server if (emptyFServerPresent) { fetchFromTail = !fetchFromTail; } } serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken)); } int totalNumMoved = entityGroupsToMove.size(); // Walk down least loaded, filling each to the min int neededEntityGroups = 0; // number of entityGroups needed to bring all up to min fetchFromTail = false; Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>(); for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) { int entityGroupCount = server.getKey().getLoad(); if (entityGroupCount >= min) { break; } underloadedServers.put(server.getKey().getServerName(), min - entityGroupCount); } // number of servers that get new entityGroups int serversUnderloaded = underloadedServers.size(); int incr = 1; List<ServerName> sns = Arrays .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded])); Collections.shuffle(sns, RANDOM); while (entityGroupsToMove.size() > 0) { int cnt = 0; int i = incr > 0 ? 0 : underloadedServers.size() - 1; for (; i >= 0 && i < underloadedServers.size(); i += incr) { if (entityGroupsToMove.isEmpty()) break; ServerName si = sns.get(i); int numToTake = underloadedServers.get(si); if (numToTake == 0) continue; addEntityGroupPlan(entityGroupsToMove, fetchFromTail, si, entityGroupsToReturn); if (emptyFServerPresent) { fetchFromTail = !fetchFromTail; } underloadedServers.put(si, numToTake - 1); cnt++; BalanceInfo bi = serverBalanceInfo.get(si); if (bi == null) { bi = new BalanceInfo(0, 0); serverBalanceInfo.put(si, bi); } bi.setNumEntityGroupsAdded(bi.getNumEntityGroupsAdded() + 1); } if (cnt == 0) break; // iterates underloadedServers in the other direction incr = -incr; } for (Integer i : underloadedServers.values()) { // If we still want to take some, increment needed neededEntityGroups += i; } // If none needed to fill all to min and none left to drain all to max, // we are done if (neededEntityGroups == 0 && entityGroupsToMove.isEmpty()) { long endTime = System.currentTimeMillis(); LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " entityGroups off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return entityGroupsToReturn; } // Need to do a second pass. // Either more entityGroups to assign out or servers that are still underloaded // If we need more to fill min, grab one from each most loaded until enough if (neededEntityGroups != 0) { // Walk down most loaded, grabbing one from each until we get enough for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.descendingMap() .entrySet()) { BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); int idx = balanceInfo == null ? 0 : balanceInfo.getNextEntityGroupForUnload(); if (idx >= server.getValue().size()) break; EntityGroupInfo entityGroup = server.getValue().get(idx); entityGroupsToMove.add(new EntityGroupPlan(entityGroup, server.getKey().getServerName(), null)); totalNumMoved++; if (--neededEntityGroups == 0) { // No more entityGroups needed, done shedding break; } } } // Now we have a set of entityGroups that must be all assigned out // Assign each underloaded up to the min, then if leftovers, assign to max // Walk down least loaded, assigning to each to fill up to min for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) { int entityGroupCount = server.getKey().getLoad(); if (entityGroupCount >= min) break; BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); if (balanceInfo != null) { entityGroupCount += balanceInfo.getNumEntityGroupsAdded(); } if (entityGroupCount >= min) { continue; } int numToTake = min - entityGroupCount; int numTaken = 0; while (numTaken < numToTake && 0 < entityGroupsToMove.size()) { addEntityGroupPlan(entityGroupsToMove, fetchFromTail, server.getKey().getServerName(), entityGroupsToReturn); numTaken++; if (emptyFServerPresent) { fetchFromTail = !fetchFromTail; } } } // If we still have entityGroups to dish out, assign underloaded to max if (0 < entityGroupsToMove.size()) { for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) { int entityGroupCount = server.getKey().getLoad(); if (entityGroupCount >= max) { break; } addEntityGroupPlan(entityGroupsToMove, fetchFromTail, server.getKey().getServerName(), entityGroupsToReturn); if (emptyFServerPresent) { fetchFromTail = !fetchFromTail; } if (entityGroupsToMove.isEmpty()) { break; } } } long endTime = System.currentTimeMillis(); if (!entityGroupsToMove.isEmpty() || neededEntityGroups != 0) { // Emit data so can diagnose how balancer went astray. LOG.warn("entityGroupsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded); StringBuilder sb = new StringBuilder(); for (Map.Entry<ServerName, List<EntityGroupInfo>> e : clusterMap.entrySet()) { if (sb.length() > 0) sb.append(", "); sb.append(e.getKey().toString()); sb.append(" "); sb.append(e.getValue().size()); } LOG.warn("Input " + sb.toString()); } // All done! LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " entityGroups off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return entityGroupsToReturn; }
From source file:org.apache.hadoop.hbase.coprocessor.transactional.SsccRegionEndpoint.java
/** * Add a write to the transaction.//ww w. ja v a 2 s . com * process. * @param long transactionId * @param Put put * @param boolean stateless // Is this a stateless put? * @return int * @throws IOException */ public int put(final long transactionId, final long startId, final Put put, boolean stateless) throws IOException { if (LOG.isTraceEnabled()) LOG.trace("Enter SsccRegionEndpoint coprocessor: put, txid " + transactionId + ", startId " + startId + ", stateless: " + stateless); SsccTransactionState state = this.beginTransIfNotExist(transactionId, startId); // check whether has del before state.removeDelBeforePut(put, stateless); /*need to change the timestamp, but HBase API does not support At this point, the solution is to create a new Put object */ //So the solution at this point is //add a mapping of current timestamp of the put row with the startId //mapStartIdFromTs(put.getTimeStamp(),startId); // try to use getFamilyCellMap to get out all data from the put object and generate a new one byte[] rowkey = put.getRow(); Put newPut = new Put(rowkey, startId); byte[] mergedCols = null; byte[] mergedColsV = null; byte[] cv = null; NavigableMap<byte[], List<Cell>> familyCellMap = put.getFamilyCellMap(); for (Entry<byte[], List<Cell>> entry : familyCellMap.entrySet()) { for (Iterator<Cell> iterator = entry.getValue().iterator(); iterator.hasNext();) { Cell cell = iterator.next(); byte[] family = CellUtil.cloneFamily(cell); byte[] qualifier = CellUtil.cloneQualifier(cell); mergedCols = null; mergedCols = byteMerger("|".getBytes(), qualifier); mergedCols = byteMerger(mergedCols, "|".getBytes()); byte[] value = CellUtil.cloneValue(cell); newPut.add(family, qualifier, startId, value); byte[] currentCollist = state.getColList(rowkey); if (indexOf(currentCollist, mergedCols) != -1) //already in this list { mergedColsV = byteMerger(currentCollist, null); continue; } mergedColsV = byteMerger(mergedCols, currentCollist); state.addToColList(rowkey, mergedColsV); } } //get the statusList Get statusGet = new Get(rowkey); //statusGet.setTimeStamp(startId); statusGet.addColumn(DtmConst.TRANSACTION_META_FAMILY, SsccConst.STATUS_COL); //statusGet.setTimeRange(0, startId + 1); //only get data updated before me //statusGet.setMaxVersions(DtmConst.MAX_VERSION); statusGet.setMaxVersions(); Result statusResult = m_Region.get(statusGet); List<Cell> sl = null; List<Cell> vl = null; //get the versionList // If this is a stateless put we don't need the version list if (stateless == false) { Get verGet = new Get(rowkey); //verGet.setTimeStamp(startId); verGet.addColumn(DtmConst.TRANSACTION_META_FAMILY, SsccConst.VERSION_COL); verGet.setMaxVersions(DtmConst.MAX_VERSION); Result verResult = m_Region.get(verGet); if (verResult != null) vl = verResult.listCells(); } if (statusResult != null) sl = statusResult.listCells(); if (LOG.isTraceEnabled()) LOG.trace("SsccRegionEndpoint coprocessor: put stateless: " + stateless); if (state.hasConflict(sl, vl, stateless, startId, transactionId) == false) { state.addToPutList(rowkey); //update status metadata byte[] statusValue; if (stateless) { statusValue = SsccConst.generateStatusValue(SsccConst.S_STATELESS_BYTE, transactionId); //stateless update } else { statusValue = SsccConst.generateStatusValue(SsccConst.S_STATEFUL_BYTE, transactionId); //stateful update } newPut.add(DtmConst.TRANSACTION_META_FAMILY, SsccConst.STATUS_COL, startId, statusValue); newPut.add(DtmConst.TRANSACTION_META_FAMILY, SsccConst.COLUMNS_COL, startId, mergedColsV); //perform the put operation, persistently save the data now. // LOG.info("UNIQUE: put ok " ); m_Region.put(newPut); return stateless ? STATELESS_UPDATE_OK : STATEFUL_UPDATE_OK; } else { //conflict // Return conflict, but don't trigger and abort. That needs to be triggered from the client, if desired. if (LOG.isTraceEnabled()) LOG.trace("UNIQUE: put STATEFUL_UPDATE_CONFLICT "); return stateless ? STATELESS_UPDATE_CONFLICT : STATEFUL_UPDATE_CONFLICT; } }
From source file:org.apache.tajo.storage.hbase.HBaseScanner.java
private Datum getDatum(Result result, int fieldId) throws IOException { byte[] value = null; if (isRowKeyMappings[fieldId]) { value = result.getRow();/*w ww . j a va 2 s. c om*/ if (!isBinaryColumns[fieldId] && rowKeyFieldIndexes[fieldId] >= 0) { int rowKeyFieldIndex = rowKeyFieldIndexes[fieldId]; byte[][] rowKeyFields = BytesUtils.splitPreserveAllTokens(value, rowKeyDelimiter, columnMapping.getNumColumns()); if (rowKeyFields.length < rowKeyFieldIndex) { return NullDatum.get(); } else { value = rowKeyFields[rowKeyFieldIndex]; } } } else { if (isColumnKeys[fieldId]) { NavigableMap<byte[], byte[]> cfMap = result.getFamilyMap(mappingColumnFamilies[fieldId][0]); if (cfMap != null) { Set<byte[]> keySet = cfMap.keySet(); if (keySet.size() == 1) { try { return HBaseTextSerializerDeserializer.deserialize(schemaColumns[fieldId], keySet.iterator().next()); } catch (Exception e) { LOG.error(e.getMessage(), e); throw new RuntimeException(e.getMessage(), e); } } else { StringBuilder sb = new StringBuilder(); sb.append("["); int count = 0; for (byte[] eachKey : keySet) { if (count > 0) { sb.append(", "); } Datum datum = HBaseTextSerializerDeserializer.deserialize(schemaColumns[fieldId], eachKey); sb.append("\"").append(datum.asChars()).append("\""); count++; if (count > MAX_LIST_SIZE) { break; } } sb.append("]"); return new TextDatum(sb.toString()); } } } else if (isColumnValues[fieldId]) { NavigableMap<byte[], byte[]> cfMap = result.getFamilyMap(mappingColumnFamilies[fieldId][0]); if (cfMap != null) { Collection<byte[]> valueList = cfMap.values(); if (valueList.size() == 1) { try { return HBaseTextSerializerDeserializer.deserialize(schemaColumns[fieldId], valueList.iterator().next()); } catch (Exception e) { LOG.error(e.getMessage(), e); throw new RuntimeException(e.getMessage(), e); } } else { StringBuilder sb = new StringBuilder(); sb.append("["); int count = 0; for (byte[] eachValue : valueList) { if (count > 0) { sb.append(", "); } Datum datum = HBaseTextSerializerDeserializer.deserialize(schemaColumns[fieldId], eachValue); sb.append("\"").append(datum.asChars()).append("\""); count++; if (count > MAX_LIST_SIZE) { break; } } sb.append("]"); return new TextDatum(sb.toString()); } } } else { if (mappingColumnFamilies[fieldId][1] == null) { NavigableMap<byte[], byte[]> cfMap = result.getFamilyMap(mappingColumnFamilies[fieldId][0]); if (cfMap != null && !cfMap.isEmpty()) { int count = 0; String delim = ""; if (cfMap.size() == 0) { return NullDatum.get(); } else if (cfMap.size() == 1) { // If a column family is mapped without column name like "cf1:" and the number of cells is one, // return value is flat format not json format. NavigableMap.Entry<byte[], byte[]> entry = cfMap.entrySet().iterator().next(); byte[] entryKey = entry.getKey(); byte[] entryValue = entry.getValue(); if (entryKey == null || entryKey.length == 0) { try { if (isBinaryColumns[fieldId]) { return HBaseBinarySerializerDeserializer.deserialize(schemaColumns[fieldId], entryValue); } else { return HBaseTextSerializerDeserializer.deserialize(schemaColumns[fieldId], entryValue); } } catch (Exception e) { LOG.error(e.getMessage(), e); throw new RuntimeException(e.getMessage(), e); } } } StringBuilder sb = new StringBuilder(); sb.append("{"); for (NavigableMap.Entry<byte[], byte[]> entry : cfMap.entrySet()) { byte[] entryKey = entry.getKey(); byte[] entryValue = entry.getValue(); String keyText = new String(entryKey); String valueText = null; if (entryValue != null) { try { if (isBinaryColumns[fieldId]) { valueText = HBaseBinarySerializerDeserializer .deserialize(schemaColumns[fieldId], entryValue).asChars(); } else { valueText = HBaseTextSerializerDeserializer .deserialize(schemaColumns[fieldId], entryValue).asChars(); } } catch (Exception e) { LOG.error(e.getMessage(), e); throw new RuntimeException(e.getMessage(), e); } } sb.append(delim).append("\"").append(keyText).append("\":\"").append(valueText) .append("\""); delim = ", "; count++; if (count > MAX_LIST_SIZE) { break; } } //end of for sb.append("}"); return new TextDatum(sb.toString()); } else { value = null; } } else { value = result.getValue(mappingColumnFamilies[fieldId][0], mappingColumnFamilies[fieldId][1]); } } } if (value == null) { return NullDatum.get(); } else { try { if (isBinaryColumns[fieldId]) { return HBaseBinarySerializerDeserializer.deserialize(schemaColumns[fieldId], value); } else { return HBaseTextSerializerDeserializer.deserialize(schemaColumns[fieldId], value); } } catch (Exception e) { LOG.error(e.getMessage(), e); throw new RuntimeException(e.getMessage(), e); } } }