Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.net; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; /** The class represents a cluster of computer with a tree hierarchical * network topology. * For example, a cluster may be consists of many data centers filled * with racks of computers. * In a network topology, leaves represent data nodes (computers) and inner * nodes represent switches/routers that manage traffic in/out of data centers * or racks. * */ @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) @InterfaceStability.Unstable public class NetworkTopology { public final static String DEFAULT_RACK = "/default-rack"; public static final Logger LOG = LoggerFactory.getLogger(NetworkTopology.class); private static final char PATH_SEPARATOR = '/'; private static final String PATH_SEPARATOR_STR = "/"; private static final String ROOT = "/"; public static class InvalidTopologyException extends RuntimeException { private static final long serialVersionUID = 1L; public InvalidTopologyException(String msg) { super(msg); } } /** * Get an instance of NetworkTopology based on the value of the configuration * parameter net.topology.impl. * * @param conf the configuration to be used * @return an instance of NetworkTopology */ public static NetworkTopology getInstance(Configuration conf) { return getInstance(conf, InnerNodeImpl.FACTORY); } public static NetworkTopology getInstance(Configuration conf, InnerNode.Factory factory) { NetworkTopology nt = ReflectionUtils .newInstance(conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY, NetworkTopology.class, NetworkTopology.class), conf); return nt.init(factory); } protected NetworkTopology init(InnerNode.Factory factory) { if (!factory.equals(this.factory)) { // the constructor has initialized the factory to default. So only init // again if another factory is specified. this.factory = factory; this.clusterMap = factory.newInnerNode(NodeBase.ROOT); } return this; } InnerNode.Factory factory; /** * the root cluster map */ InnerNode clusterMap; /** Depth of all leaf nodes */ private int depthOfAllLeaves = -1; /** rack counter */ protected int numOfRacks = 0; /** * Whether or not this cluster has ever consisted of more than 1 rack, * according to the NetworkTopology. */ private boolean clusterEverBeenMultiRack = false; /** the lock used to manage access */ protected ReadWriteLock netlock = new ReentrantReadWriteLock(true); // keeping the constructor because other components like MR still uses this. public NetworkTopology() { this.factory = InnerNodeImpl.FACTORY; this.clusterMap = factory.newInnerNode(NodeBase.ROOT); } /** Add a leaf node * Update node counter & rack counter if necessary * @param node node to be added; can be null * @exception IllegalArgumentException if add a node to a leave or node to be added is not a leaf */ public void add(Node node) { if (node == null) return; int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1; netlock.writeLock().lock(); try { if (node instanceof InnerNode) { throw new IllegalArgumentException("Not allow to add an inner node: " + NodeBase.getPath(node)); } if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) { LOG.error("Error: can't add leaf node {} at depth {} to topology:{}\n", NodeBase.getPath(node), newDepth, this); throw new InvalidTopologyException("Failed to add " + NodeBase.getPath(node) + ": You cannot have a rack and a non-rack node at the same " + "level of the network topology."); } Node rack = getNodeForNetworkLocation(node); if (rack != null && !(rack instanceof InnerNode)) { throw new IllegalArgumentException( "Unexpected data node " + node.toString() + " at an illegal network location"); } if (clusterMap.add(node)) { LOG.info("Adding a new node: " + NodeBase.getPath(node)); if (rack == null) { incrementRacks(); } if (!(node instanceof InnerNode)) { if (depthOfAllLeaves == -1) { depthOfAllLeaves = node.getLevel(); } } } LOG.debug("NetworkTopology became:\n{}", this); } finally { netlock.writeLock().unlock(); } } protected void incrementRacks() { numOfRacks++; if (!clusterEverBeenMultiRack && numOfRacks > 1) { clusterEverBeenMultiRack = true; } } /** * Return a reference to the node given its string representation. * Default implementation delegates to {@link #getNode(String)}. * * <p>To be overridden in subclasses for specific NetworkTopology * implementations, as alternative to overriding the full {@link #add(Node)} * method. * * @param node The string representation of this node's network location is * used to retrieve a Node object. * @return a reference to the node; null if the node is not in the tree * * @see #add(Node) * @see #getNode(String) */ protected Node getNodeForNetworkLocation(Node node) { return getNode(node.getNetworkLocation()); } /** * Given a string representation of a rack, return its children * @param loc a path-like string representation of a rack * @return a newly allocated list with all the node's children */ public List<Node> getDatanodesInRack(String loc) { netlock.readLock().lock(); try { loc = NodeBase.normalize(loc); if (!NodeBase.ROOT.equals(loc)) { loc = loc.substring(1); } InnerNode rack = (InnerNode) clusterMap.getLoc(loc); if (rack == null) { return null; } return new ArrayList<Node>(rack.getChildren()); } finally { netlock.readLock().unlock(); } } /** Remove a node * Update node counter and rack counter if necessary * @param node node to be removed; can be null */ public void remove(Node node) { if (node == null) return; if (node instanceof InnerNode) { throw new IllegalArgumentException("Not allow to remove an inner node: " + NodeBase.getPath(node)); } LOG.info("Removing a node: " + NodeBase.getPath(node)); netlock.writeLock().lock(); try { if (clusterMap.remove(node)) { InnerNode rack = (InnerNode) getNode(node.getNetworkLocation()); if (rack == null) { numOfRacks--; } } LOG.debug("NetworkTopology became:\n{}", this); } finally { netlock.writeLock().unlock(); } } /** Check if the tree contains node <i>node</i> * * @param node a node * @return true if <i>node</i> is already in the tree; false otherwise */ public boolean contains(Node node) { if (node == null) return false; netlock.readLock().lock(); try { Node parent = node.getParent(); for (int level = node.getLevel(); parent != null && level > 0; parent = parent.getParent(), level--) { if (parent == clusterMap) { return true; } } } finally { netlock.readLock().unlock(); } return false; } /** Given a string representation of a node, return its reference * * @param loc * a path-like string representation of a node * @return a reference to the node; null if the node is not in the tree */ public Node getNode(String loc) { netlock.readLock().lock(); try { loc = NodeBase.normalize(loc); if (!NodeBase.ROOT.equals(loc)) loc = loc.substring(1); return clusterMap.getLoc(loc); } finally { netlock.readLock().unlock(); } } /** * @return true if this cluster has ever consisted of multiple racks, even if * it is not now a multi-rack cluster. */ public boolean hasClusterEverBeenMultiRack() { return clusterEverBeenMultiRack; } /** Given a string representation of a rack for a specific network * location * * To be overridden in subclasses for specific NetworkTopology * implementations, as alternative to overriding the full * {@link #getRack(String)} method. * @param loc * a path-like string representation of a network location * @return a rack string */ public String getRack(String loc) { return loc; } /** @return the total number of racks */ public int getNumOfRacks() { netlock.readLock().lock(); try { return numOfRacks; } finally { netlock.readLock().unlock(); } } /** @return the total number of leaf nodes */ public int getNumOfLeaves() { netlock.readLock().lock(); try { return clusterMap.getNumOfLeaves(); } finally { netlock.readLock().unlock(); } } /** Return the distance between two nodes * It is assumed that the distance from one node to its parent is 1 * The distance between two nodes is calculated by summing up their distances * to their closest common ancestor. * @param node1 one node * @param node2 another node * @return the distance between node1 and node2 which is zero if they are the same * or {@link Integer#MAX_VALUE} if node1 or node2 do not belong to the cluster */ public int getDistance(Node node1, Node node2) { if ((node1 != null && node1.equals(node2)) || (node1 == null && node2 == null)) { return 0; } if (node1 == null || node2 == null) { LOG.warn("One of the nodes is a null pointer"); return Integer.MAX_VALUE; } Node n1 = node1, n2 = node2; int dis = 0; netlock.readLock().lock(); try { int level1 = node1.getLevel(), level2 = node2.getLevel(); while (n1 != null && level1 > level2) { n1 = n1.getParent(); level1--; dis++; } while (n2 != null && level2 > level1) { n2 = n2.getParent(); level2--; dis++; } while (n1 != null && n2 != null && n1.getParent() != n2.getParent()) { n1 = n1.getParent(); n2 = n2.getParent(); dis += 2; } } finally { netlock.readLock().unlock(); } if (n1 == null) { LOG.warn("The cluster does not contain node: " + NodeBase.getPath(node1)); return Integer.MAX_VALUE; } if (n2 == null) { LOG.warn("The cluster does not contain node: " + NodeBase.getPath(node2)); return Integer.MAX_VALUE; } return dis + 2; } /** Return the distance between two nodes by comparing their network paths * without checking if they belong to the same ancestor node by reference. * It is assumed that the distance from one node to its parent is 1 * The distance between two nodes is calculated by summing up their distances * to their closest common ancestor. * @param node1 one node * @param node2 another node * @return the distance between node1 and node2 */ static public int getDistanceByPath(Node node1, Node node2) { if (node1 == null && node2 == null) { return 0; } if (node1 == null || node2 == null) { LOG.warn("One of the nodes is a null pointer"); return Integer.MAX_VALUE; } String[] paths1 = NodeBase.getPathComponents(node1); String[] paths2 = NodeBase.getPathComponents(node2); int dis = 0; int index = 0; int minLevel = Math.min(paths1.length, paths2.length); while (index < minLevel) { if (!paths1[index].equals(paths2[index])) { // Once the path starts to diverge, compute the distance that include // the rest of paths. dis += 2 * (minLevel - index); break; } index++; } dis += Math.abs(paths1.length - paths2.length); return dis; } /** Check if two nodes are on the same rack * @param node1 one node (can be null) * @param node2 another node (can be null) * @return true if node1 and node2 are on the same rack; false otherwise * @exception IllegalArgumentException when either node1 or node2 is null, or * node1 or node2 do not belong to the cluster */ public boolean isOnSameRack(Node node1, Node node2) { if (node1 == null || node2 == null) { return false; } netlock.readLock().lock(); try { return isSameParents(node1, node2); } finally { netlock.readLock().unlock(); } } /** * Check if network topology is aware of NodeGroup */ public boolean isNodeGroupAware() { return false; } /** * Return false directly as not aware of NodeGroup, to be override in sub-class */ public boolean isOnSameNodeGroup(Node node1, Node node2) { return false; } /** * Compare the parents of each node for equality * * <p>To be overridden in subclasses for specific NetworkTopology * implementations, as alternative to overriding the full * {@link #isOnSameRack(Node, Node)} method. * * @param node1 the first node to compare * @param node2 the second node to compare * @return true if their parents are equal, false otherwise * * @see #isOnSameRack(Node, Node) */ protected boolean isSameParents(Node node1, Node node2) { return node1.getParent() == node2.getParent(); } private static final Random r = new Random(); @VisibleForTesting void setRandomSeed(long seed) { r.setSeed(seed); } /** * Randomly choose a node. * * @param scope range of nodes from which a node will be chosen * @return the chosen node * * @see #chooseRandom(String, Collection) */ public Node chooseRandom(final String scope) { return chooseRandom(scope, null); } /** * Randomly choose one node from <i>scope</i>. * * If scope starts with ~, choose one from the all nodes except for the * ones in <i>scope</i>; otherwise, choose one from <i>scope</i>. * If excludedNodes is given, choose a node that's not in excludedNodes. * * @param scope range of nodes from which a node will be chosen * @param excludedNodes nodes to be excluded from * @return the chosen node */ public Node chooseRandom(final String scope, final Collection<Node> excludedNodes) { netlock.readLock().lock(); try { if (scope.startsWith("~")) { return chooseRandom(NodeBase.ROOT, scope.substring(1), excludedNodes); } else { return chooseRandom(scope, null, excludedNodes); } } finally { netlock.readLock().unlock(); } } protected Node chooseRandom(final String scope, String excludedScope, final Collection<Node> excludedNodes) { if (excludedScope != null) { if (scope.startsWith(excludedScope)) { return null; } if (!excludedScope.startsWith(scope)) { excludedScope = null; } } Node node = getNode(scope); if (!(node instanceof InnerNode)) { return excludedNodes != null && excludedNodes.contains(node) ? null : node; } InnerNode innerNode = (InnerNode) node; int numOfDatanodes = innerNode.getNumOfLeaves(); if (excludedScope == null) { node = null; } else { node = getNode(excludedScope); if (!(node instanceof InnerNode)) { numOfDatanodes -= 1; } else { numOfDatanodes -= ((InnerNode) node).getNumOfLeaves(); } } if (numOfDatanodes <= 0) { LOG.debug("Failed to find datanode (scope=\"{}\" excludedScope=\"{}\")." + " numOfDatanodes={}", scope, excludedScope, numOfDatanodes); return null; } final int availableNodes; if (excludedScope == null) { availableNodes = countNumOfAvailableNodes(scope, excludedNodes); } else { availableNodes = countNumOfAvailableNodes("~" + excludedScope, excludedNodes); } LOG.debug( "Choosing random from {} available nodes on node {}," + " scope={}, excludedScope={}, excludeNodes={}. numOfDatanodes={}.", availableNodes, innerNode, scope, excludedScope, excludedNodes, numOfDatanodes); Node ret = null; if (availableNodes > 0) { ret = chooseRandom(innerNode, node, excludedNodes, numOfDatanodes, availableNodes); } LOG.debug("chooseRandom returning {}", ret); return ret; } /** * Randomly choose one node under <i>parentNode</i>, considering the exclude * nodes and scope. Should be called with {@link #netlock}'s readlock held. * * @param parentNode the parent node * @param excludedScopeNode the node corresponding to the exclude scope. * @param excludedNodes a collection of nodes to be excluded from * @param totalInScopeNodes total number of nodes under parentNode, excluding * the excludedScopeNode * @param availableNodes number of available nodes under parentNode that * could be chosen, excluding excludedNodes * @return the chosen node, or null if none can be chosen */ private Node chooseRandom(final InnerNode parentNode, final Node excludedScopeNode, final Collection<Node> excludedNodes, final int totalInScopeNodes, final int availableNodes) { Preconditions.checkArgument(totalInScopeNodes >= availableNodes && availableNodes > 0, String.format("%d should >= %d, and both should be positive.", totalInScopeNodes, availableNodes)); if (excludedNodes == null || excludedNodes.isEmpty()) { // if there are no excludedNodes, randomly choose a node final int index = r.nextInt(totalInScopeNodes); return parentNode.getLeaf(index, excludedScopeNode); } // excludedNodes non empty. // Choose the nth VALID node, where n is random. VALID meaning it can be // returned, after considering exclude scope and exclude nodes. // The probability of being chosen should be equal for all VALID nodes. // Notably, we do NOT choose nth node, and find the next valid node // if n is excluded - this will make the probability of the node immediately // after an excluded node higher. // // Start point is always 0 and that's fine, because the nth valid node // logic provides equal randomness. // // Consider this example, where 1,3,5 out of the 10 nodes are excluded: // 1 2 3 4 5 6 7 8 9 10 // x x x // We will randomly choose the nth valid node where n is [0,6]. // We do NOT choose a random number n and just use the closest valid node, // for example both n=3 and n=4 will choose 4, making it a 2/10 probability, // higher than the expected 1/7 // totalInScopeNodes=10 and availableNodes=7 in this example. int nthValidToReturn = r.nextInt(availableNodes); LOG.debug("nthValidToReturn is {}", nthValidToReturn); Node ret = parentNode.getLeaf(r.nextInt(totalInScopeNodes), excludedScopeNode); if (!excludedNodes.contains(ret)) { // return if we're lucky enough to get a valid node at a random first pick LOG.debug("Chosen node {} from first random", ret); return ret; } else { ret = null; } Node lastValidNode = null; for (int i = 0; i < totalInScopeNodes; ++i) { ret = parentNode.getLeaf(i, excludedScopeNode); if (!excludedNodes.contains(ret)) { if (nthValidToReturn == 0) { break; } --nthValidToReturn; lastValidNode = ret; } else { LOG.debug("Node {} is excluded, continuing.", ret); ret = null; } } if (ret == null && lastValidNode != null) { LOG.error( "BUG: Found lastValidNode {} but not nth valid node. " + "parentNode={}, excludedScopeNode={}, excludedNodes={}, " + "totalInScopeNodes={}, availableNodes={}, nthValidToReturn={}.", lastValidNode, parentNode, excludedScopeNode, excludedNodes, totalInScopeNodes, availableNodes, nthValidToReturn); ret = lastValidNode; } return ret; } /** return leaves in <i>scope</i> * @param scope a path string * @return leaves nodes under specific scope */ public List<Node> getLeaves(String scope) { Node node = getNode(scope); List<Node> leafNodes = new ArrayList<Node>(); if (!(node instanceof InnerNode)) { leafNodes.add(node); } else { InnerNode innerNode = (InnerNode) node; for (int i = 0; i < innerNode.getNumOfLeaves(); i++) { leafNodes.add(innerNode.getLeaf(i, null)); } } return leafNodes; } /** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i> * if scope starts with ~, return the number of nodes that are not * in <i>scope</i> and <i>excludedNodes</i>; * @param scope a path string that may start with ~ * @param excludedNodes a list of nodes * @return number of available nodes */ @VisibleForTesting public int countNumOfAvailableNodes(String scope, Collection<Node> excludedNodes) { boolean isExcluded = false; if (scope.startsWith("~")) { isExcluded = true; scope = scope.substring(1); } scope = NodeBase.normalize(scope); int excludedCountInScope = 0; // the number of nodes in both scope & excludedNodes int excludedCountOffScope = 0; // the number of nodes outside scope & excludedNodes netlock.readLock().lock(); try { if (excludedNodes != null) { for (Node node : excludedNodes) { node = getNode(NodeBase.getPath(node)); if (node == null) { continue; } if ((NodeBase.getPath(node) + NodeBase.PATH_SEPARATOR_STR) .startsWith(scope + NodeBase.PATH_SEPARATOR_STR)) { excludedCountInScope++; } else { excludedCountOffScope++; } } } Node n = getNode(scope); int scopeNodeCount = 0; if (n != null) { scopeNodeCount++; } if (n instanceof InnerNode) { scopeNodeCount = ((InnerNode) n).getNumOfLeaves(); } if (isExcluded) { return clusterMap.getNumOfLeaves() - scopeNodeCount - excludedCountOffScope; } else { return scopeNodeCount - excludedCountInScope; } } finally { netlock.readLock().unlock(); } } /** convert a network tree to a string. */ @Override public String toString() { // print the number of racks StringBuilder tree = new StringBuilder(); tree.append("Number of racks: "); tree.append(numOfRacks); tree.append("\n"); // print the number of leaves int numOfLeaves = getNumOfLeaves(); tree.append("Expected number of leaves:"); tree.append(numOfLeaves); tree.append("\n"); // print nodes for (int i = 0; i < numOfLeaves; i++) { tree.append(NodeBase.getPath(clusterMap.getLeaf(i, null))); tree.append("\n"); } return tree.toString(); } /** * Divide networklocation string into two parts by last separator, and get * the first part here. * * @param networkLocation * @return */ public static String getFirstHalf(String networkLocation) { int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); return networkLocation.substring(0, index); } /** * Divide networklocation string into two parts by last separator, and get * the second part here. * * @param networkLocation * @return */ public static String getLastHalf(String networkLocation) { int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); return networkLocation.substring(index); } /** * Returns an integer weight which specifies how far away {node} is away from * {reader}. A lower value signifies that a node is closer. * * @param reader Node where data will be read * @param node Replica of data * @return weight */ protected int getWeight(Node reader, Node node) { // 0 is local, 2 is same rack, and each level on each node increases the //weight by 1 //Start off by initializing to Integer.MAX_VALUE int weight = Integer.MAX_VALUE; if (reader != null && node != null) { if (reader.equals(node)) { return 0; } int maxReaderLevel = reader.getLevel(); int maxNodeLevel = node.getLevel(); int currentLevelToCompare = maxReaderLevel > maxNodeLevel ? maxNodeLevel : maxReaderLevel; Node r = reader; Node n = node; weight = 0; while (r != null && r.getLevel() > currentLevelToCompare) { r = r.getParent(); weight++; } while (n != null && n.getLevel() > currentLevelToCompare) { n = n.getParent(); weight++; } while (r != null && n != null && !r.equals(n)) { r = r.getParent(); n = n.getParent(); weight += 2; } } return weight; } /** * Returns an integer weight which specifies how far away <i>node</i> is * from <i>reader</i>. A lower value signifies that a node is closer. * It uses network location to calculate the weight * * @param reader Node where data will be read * @param node Replica of data * @return weight */ private static int getWeightUsingNetworkLocation(Node reader, Node node) { //Start off by initializing to Integer.MAX_VALUE int weight = Integer.MAX_VALUE; if (reader != null && node != null) { String readerPath = normalizeNetworkLocationPath(reader.getNetworkLocation()); String nodePath = normalizeNetworkLocationPath(node.getNetworkLocation()); //same rack if (readerPath.equals(nodePath)) { if (reader.getName().equals(node.getName())) { weight = 0; } else { weight = 2; } } else { String[] readerPathToken = readerPath.split(PATH_SEPARATOR_STR); String[] nodePathToken = nodePath.split(PATH_SEPARATOR_STR); int maxLevelToCompare = readerPathToken.length > nodePathToken.length ? nodePathToken.length : readerPathToken.length; int currentLevel = 1; //traverse through the path and calculate the distance while (currentLevel < maxLevelToCompare) { if (!readerPathToken[currentLevel].equals(nodePathToken[currentLevel])) { break; } currentLevel++; } weight = (readerPathToken.length - currentLevel) + (nodePathToken.length - currentLevel); } } return weight; } /** Normalize a path by stripping off any trailing {@link #PATH_SEPARATOR}. * @param path path to normalize. * @return the normalised path * If <i>path</i>is null or empty {@link #ROOT} is returned * @throws IllegalArgumentException if the first character of a non empty path * is not {@link #PATH_SEPARATOR} */ private static String normalizeNetworkLocationPath(String path) { if (path == null || path.length() == 0) { return ROOT; } if (path.charAt(0) != PATH_SEPARATOR) { throw new IllegalArgumentException( "Network Location" + "path doesn't start with " + PATH_SEPARATOR + ": " + path); } int len = path.length(); if (path.charAt(len - 1) == PATH_SEPARATOR) { return path.substring(0, len - 1); } return path; } /** * Sort nodes array by network distance to <i>reader</i>. * <p/> * In a three-level topology, a node can be either local, on the same rack, * or on a different rack from the reader. Sorting the nodes based on network * distance from the reader reduces network traffic and improves * performance. * <p/> * As an additional twist, we also randomize the nodes at each network * distance. This helps with load balancing when there is data skew. * * @param reader Node where data will be read * @param nodes Available replicas with the requested data * @param activeLen Number of active nodes at the front of the array */ public void sortByDistance(Node reader, Node[] nodes, int activeLen) { /* * This method is called if the reader is a datanode, * so nonDataNodeReader flag is set to false. */ sortByDistance(reader, nodes, activeLen, false); } /** * Sort nodes array by network distance to <i>reader</i>. * <p/> using network location. This is used when the reader * is not a datanode. Sorting the nodes based on network distance * from the reader reduces network traffic and improves * performance. * <p/> * * @param reader Node where data will be read * @param nodes Available replicas with the requested data * @param activeLen Number of active nodes at the front of the array */ public void sortByDistanceUsingNetworkLocation(Node reader, Node[] nodes, int activeLen) { /* * This method is called if the reader is not a datanode, * so nonDataNodeReader flag is set to true. */ sortByDistance(reader, nodes, activeLen, true); } /** * Sort nodes array by network distance to <i>reader</i>. * <p/> * As an additional twist, we also randomize the nodes at each network * distance. This helps with load balancing when there is data skew. * * @param reader Node where data will be read * @param nodes Available replicas with the requested data * @param activeLen Number of active nodes at the front of the array * @param nonDataNodeReader True if the reader is not a datanode */ private void sortByDistance(Node reader, Node[] nodes, int activeLen, boolean nonDataNodeReader) { /** Sort weights for the nodes array */ int[] weights = new int[activeLen]; for (int i = 0; i < activeLen; i++) { if (nonDataNodeReader) { weights[i] = getWeightUsingNetworkLocation(reader, nodes[i]); } else { weights[i] = getWeight(reader, nodes[i]); } } // Add weight/node pairs to a TreeMap to sort TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>(); for (int i = 0; i < activeLen; i++) { int weight = weights[i]; Node node = nodes[i]; List<Node> list = tree.get(weight); if (list == null) { list = Lists.newArrayListWithExpectedSize(1); tree.put(weight, list); } list.add(node); } int idx = 0; for (List<Node> list : tree.values()) { if (list != null) { Collections.shuffle(list, r); for (Node n : list) { nodes[idx] = n; idx++; } } } Preconditions.checkState(idx == activeLen, "Sorted the wrong number of nodes!"); } }