001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.net; 019 020import java.util.ArrayList; 021import java.util.List; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.List; 025import java.util.Random; 026import java.util.TreeMap; 027import java.util.concurrent.locks.ReadWriteLock; 028import java.util.concurrent.locks.ReentrantReadWriteLock; 029 030import com.google.common.annotations.VisibleForTesting; 031import org.apache.commons.logging.Log; 032import org.apache.commons.logging.LogFactory; 033import org.apache.hadoop.classification.InterfaceAudience; 034import org.apache.hadoop.classification.InterfaceStability; 035import org.apache.hadoop.conf.Configuration; 036import org.apache.hadoop.fs.CommonConfigurationKeysPublic; 037import org.apache.hadoop.util.ReflectionUtils; 038 039import com.google.common.base.Preconditions; 040import com.google.common.collect.Lists; 041 042/** The class represents a cluster of computer with a tree hierarchical 043 * network topology. 044 * For example, a cluster may be consists of many data centers filled 045 * with racks of computers. 046 * In a network topology, leaves represent data nodes (computers) and inner 047 * nodes represent switches/routers that manage traffic in/out of data centers 048 * or racks. 049 * 050 */ 051@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 052@InterfaceStability.Unstable 053public class NetworkTopology { 054 public final static String DEFAULT_RACK = "/default-rack"; 055 public final static int DEFAULT_HOST_LEVEL = 2; 056 public static final Log LOG = 057 LogFactory.getLog(NetworkTopology.class); 058 059 public static class InvalidTopologyException extends RuntimeException { 060 private static final long serialVersionUID = 1L; 061 public InvalidTopologyException(String msg) { 062 super(msg); 063 } 064 } 065 066 /** 067 * Get an instance of NetworkTopology based on the value of the configuration 068 * parameter net.topology.impl. 069 * 070 * @param conf the configuration to be used 071 * @return an instance of NetworkTopology 072 */ 073 public static NetworkTopology getInstance(Configuration conf){ 074 return ReflectionUtils.newInstance( 075 conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY, 076 NetworkTopology.class, NetworkTopology.class), conf); 077 } 078 079 /** InnerNode represents a switch/router of a data center or rack. 080 * Different from a leaf node, it has non-null children. 081 */ 082 static class InnerNode extends NodeBase { 083 protected List<Node> children=new ArrayList<Node>(); 084 private int numOfLeaves; 085 086 /** Construct an InnerNode from a path-like string */ 087 InnerNode(String path) { 088 super(path); 089 } 090 091 /** Construct an InnerNode from its name and its network location */ 092 InnerNode(String name, String location) { 093 super(name, location); 094 } 095 096 /** Construct an InnerNode 097 * from its name, its network location, its parent, and its level */ 098 InnerNode(String name, String location, InnerNode parent, int level) { 099 super(name, location, parent, level); 100 } 101 102 /** @return its children */ 103 List<Node> getChildren() {return children;} 104 105 /** @return the number of children this node has */ 106 int getNumOfChildren() { 107 return children.size(); 108 } 109 110 /** Judge if this node represents a rack 111 * @return true if it has no child or its children are not InnerNodes 112 */ 113 boolean isRack() { 114 if (children.isEmpty()) { 115 return true; 116 } 117 118 Node firstChild = children.get(0); 119 if (firstChild instanceof InnerNode) { 120 return false; 121 } 122 123 return true; 124 } 125 126 /** Judge if this node is an ancestor of node <i>n</i> 127 * 128 * @param n a node 129 * @return true if this node is an ancestor of <i>n</i> 130 */ 131 boolean isAncestor(Node n) { 132 return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) || 133 (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR). 134 startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR); 135 } 136 137 /** Judge if this node is the parent of node <i>n</i> 138 * 139 * @param n a node 140 * @return true if this node is the parent of <i>n</i> 141 */ 142 boolean isParent(Node n) { 143 return n.getNetworkLocation().equals(getPath(this)); 144 } 145 146 /* Return a child name of this node who is an ancestor of node <i>n</i> */ 147 private String getNextAncestorName(Node n) { 148 if (!isAncestor(n)) { 149 throw new IllegalArgumentException( 150 this + "is not an ancestor of " + n); 151 } 152 String name = n.getNetworkLocation().substring(getPath(this).length()); 153 if (name.charAt(0) == PATH_SEPARATOR) { 154 name = name.substring(1); 155 } 156 int index=name.indexOf(PATH_SEPARATOR); 157 if (index !=-1) 158 name = name.substring(0, index); 159 return name; 160 } 161 162 /** Add node <i>n</i> to the subtree of this node 163 * @param n node to be added 164 * @return true if the node is added; false otherwise 165 */ 166 boolean add(Node n) { 167 if (!isAncestor(n)) 168 throw new IllegalArgumentException(n.getName()+", which is located at " 169 +n.getNetworkLocation()+", is not a decendent of " 170 +getPath(this)); 171 if (isParent(n)) { 172 // this node is the parent of n; add n directly 173 n.setParent(this); 174 n.setLevel(this.level+1); 175 for(int i=0; i<children.size(); i++) { 176 if (children.get(i).getName().equals(n.getName())) { 177 children.set(i, n); 178 return false; 179 } 180 } 181 children.add(n); 182 numOfLeaves++; 183 return true; 184 } else { 185 // find the next ancestor node 186 String parentName = getNextAncestorName(n); 187 InnerNode parentNode = null; 188 for(int i=0; i<children.size(); i++) { 189 if (children.get(i).getName().equals(parentName)) { 190 parentNode = (InnerNode)children.get(i); 191 break; 192 } 193 } 194 if (parentNode == null) { 195 // create a new InnerNode 196 parentNode = createParentNode(parentName); 197 children.add(parentNode); 198 } 199 // add n to the subtree of the next ancestor node 200 if (parentNode.add(n)) { 201 numOfLeaves++; 202 return true; 203 } else { 204 return false; 205 } 206 } 207 } 208 209 /** 210 * Creates a parent node to be added to the list of children. 211 * Creates a node using the InnerNode four argument constructor specifying 212 * the name, location, parent, and level of this node. 213 * 214 * <p>To be overridden in subclasses for specific InnerNode implementations, 215 * as alternative to overriding the full {@link #add(Node)} method. 216 * 217 * @param parentName The name of the parent node 218 * @return A new inner node 219 * @see InnerNode#InnerNode(String, String, InnerNode, int) 220 */ 221 protected InnerNode createParentNode(String parentName) { 222 return new InnerNode(parentName, getPath(this), this, this.getLevel()+1); 223 } 224 225 /** Remove node <i>n</i> from the subtree of this node 226 * @param n node to be deleted 227 * @return true if the node is deleted; false otherwise 228 */ 229 boolean remove(Node n) { 230 String parent = n.getNetworkLocation(); 231 String currentPath = getPath(this); 232 if (!isAncestor(n)) 233 throw new IllegalArgumentException(n.getName() 234 +", which is located at " 235 +parent+", is not a descendent of "+currentPath); 236 if (isParent(n)) { 237 // this node is the parent of n; remove n directly 238 for(int i=0; i<children.size(); i++) { 239 if (children.get(i).getName().equals(n.getName())) { 240 children.remove(i); 241 numOfLeaves--; 242 n.setParent(null); 243 return true; 244 } 245 } 246 return false; 247 } else { 248 // find the next ancestor node: the parent node 249 String parentName = getNextAncestorName(n); 250 InnerNode parentNode = null; 251 int i; 252 for(i=0; i<children.size(); i++) { 253 if (children.get(i).getName().equals(parentName)) { 254 parentNode = (InnerNode)children.get(i); 255 break; 256 } 257 } 258 if (parentNode==null) { 259 return false; 260 } 261 // remove n from the parent node 262 boolean isRemoved = parentNode.remove(n); 263 // if the parent node has no children, remove the parent node too 264 if (isRemoved) { 265 if (parentNode.getNumOfChildren() == 0) { 266 children.remove(i); 267 } 268 numOfLeaves--; 269 } 270 return isRemoved; 271 } 272 } // end of remove 273 274 /** Given a node's string representation, return a reference to the node 275 * @param loc string location of the form /rack/node 276 * @return null if the node is not found or the childnode is there but 277 * not an instance of {@link InnerNode} 278 */ 279 private Node getLoc(String loc) { 280 if (loc == null || loc.length() == 0) return this; 281 282 String[] path = loc.split(PATH_SEPARATOR_STR, 2); 283 Node childnode = null; 284 for(int i=0; i<children.size(); i++) { 285 if (children.get(i).getName().equals(path[0])) { 286 childnode = children.get(i); 287 } 288 } 289 if (childnode == null) return null; // non-existing node 290 if (path.length == 1) return childnode; 291 if (childnode instanceof InnerNode) { 292 return ((InnerNode)childnode).getLoc(path[1]); 293 } else { 294 return null; 295 } 296 } 297 298 /** get <i>leafIndex</i> leaf of this subtree 299 * if it is not in the <i>excludedNode</i> 300 * 301 * @param leafIndex an indexed leaf of the node 302 * @param excludedNode an excluded node (can be null) 303 * @return 304 */ 305 Node getLeaf(int leafIndex, Node excludedNode) { 306 int count=0; 307 // check if the excluded node a leaf 308 boolean isLeaf = 309 excludedNode == null || !(excludedNode instanceof InnerNode); 310 // calculate the total number of excluded leaf nodes 311 int numOfExcludedLeaves = 312 isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves(); 313 if (isLeafParent()) { // children are leaves 314 if (isLeaf) { // excluded node is a leaf node 315 int excludedIndex = children.indexOf(excludedNode); 316 if (excludedIndex != -1 && leafIndex >= 0) { 317 // excluded node is one of the children so adjust the leaf index 318 leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex; 319 } 320 } 321 // range check 322 if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) { 323 return null; 324 } 325 return children.get(leafIndex); 326 } else { 327 for(int i=0; i<children.size(); i++) { 328 InnerNode child = (InnerNode)children.get(i); 329 if (excludedNode == null || excludedNode != child) { 330 // not the excludedNode 331 int numOfLeaves = child.getNumOfLeaves(); 332 if (excludedNode != null && child.isAncestor(excludedNode)) { 333 numOfLeaves -= numOfExcludedLeaves; 334 } 335 if (count+numOfLeaves > leafIndex) { 336 // the leaf is in the child subtree 337 return child.getLeaf(leafIndex-count, excludedNode); 338 } else { 339 // go to the next child 340 count = count+numOfLeaves; 341 } 342 } else { // it is the excluededNode 343 // skip it and set the excludedNode to be null 344 excludedNode = null; 345 } 346 } 347 return null; 348 } 349 } 350 351 protected boolean isLeafParent() { 352 return isRack(); 353 } 354 355 /** 356 * Determine if children a leaves, default implementation calls {@link #isRack()} 357 * <p>To be overridden in subclasses for specific InnerNode implementations, 358 * as alternative to overriding the full {@link #getLeaf(int, Node)} method. 359 * 360 * @return true if children are leaves, false otherwise 361 */ 362 protected boolean areChildrenLeaves() { 363 return isRack(); 364 } 365 366 /** 367 * Get number of leaves. 368 */ 369 int getNumOfLeaves() { 370 return numOfLeaves; 371 } 372 } // end of InnerNode 373 374 /** 375 * the root cluster map 376 */ 377 InnerNode clusterMap; 378 /** Depth of all leaf nodes */ 379 private int depthOfAllLeaves = -1; 380 /** rack counter */ 381 protected int numOfRacks = 0; 382 /** the lock used to manage access */ 383 protected ReadWriteLock netlock = new ReentrantReadWriteLock(); 384 385 public NetworkTopology() { 386 clusterMap = new InnerNode(InnerNode.ROOT); 387 } 388 389 /** Add a leaf node 390 * Update node counter & rack counter if necessary 391 * @param node node to be added; can be null 392 * @exception IllegalArgumentException if add a node to a leave 393 or node to be added is not a leaf 394 */ 395 public void add(Node node) { 396 if (node==null) return; 397 String oldTopoStr = this.toString(); 398 if( node instanceof InnerNode ) { 399 throw new IllegalArgumentException( 400 "Not allow to add an inner node: "+NodeBase.getPath(node)); 401 } 402 int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1; 403 netlock.writeLock().lock(); 404 try { 405 if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) { 406 LOG.error("Error: can't add leaf node " + NodeBase.getPath(node) + 407 " at depth " + newDepth + " to topology:\n" + oldTopoStr); 408 throw new InvalidTopologyException("Failed to add " + NodeBase.getPath(node) + 409 ": You cannot have a rack and a non-rack node at the same " + 410 "level of the network topology."); 411 } 412 Node rack = getNodeForNetworkLocation(node); 413 if (rack != null && !(rack instanceof InnerNode)) { 414 throw new IllegalArgumentException("Unexpected data node " 415 + node.toString() 416 + " at an illegal network location"); 417 } 418 if (clusterMap.add(node)) { 419 LOG.info("Adding a new node: "+NodeBase.getPath(node)); 420 if (rack == null) { 421 numOfRacks++; 422 } 423 if (!(node instanceof InnerNode)) { 424 if (depthOfAllLeaves == -1) { 425 depthOfAllLeaves = node.getLevel(); 426 } 427 } 428 } 429 if(LOG.isDebugEnabled()) { 430 LOG.debug("NetworkTopology became:\n" + this.toString()); 431 } 432 } finally { 433 netlock.writeLock().unlock(); 434 } 435 } 436 437 /** 438 * Return a reference to the node given its string representation. 439 * Default implementation delegates to {@link #getNode(String)}. 440 * 441 * <p>To be overridden in subclasses for specific NetworkTopology 442 * implementations, as alternative to overriding the full {@link #add(Node)} 443 * method. 444 * 445 * @param node The string representation of this node's network location is 446 * used to retrieve a Node object. 447 * @return a reference to the node; null if the node is not in the tree 448 * 449 * @see #add(Node) 450 * @see #getNode(String) 451 */ 452 protected Node getNodeForNetworkLocation(Node node) { 453 return getNode(node.getNetworkLocation()); 454 } 455 456 /** 457 * Given a string representation of a rack, return its children 458 * @param loc a path-like string representation of a rack 459 * @return a newly allocated list with all the node's children 460 */ 461 public List<Node> getDatanodesInRack(String loc) { 462 netlock.readLock().lock(); 463 try { 464 loc = NodeBase.normalize(loc); 465 if (!NodeBase.ROOT.equals(loc)) { 466 loc = loc.substring(1); 467 } 468 InnerNode rack = (InnerNode) clusterMap.getLoc(loc); 469 if (rack == null) { 470 return null; 471 } 472 return new ArrayList<Node>(rack.getChildren()); 473 } finally { 474 netlock.readLock().unlock(); 475 } 476 } 477 478 /** Remove a node 479 * Update node counter and rack counter if necessary 480 * @param node node to be removed; can be null 481 */ 482 public void remove(Node node) { 483 if (node==null) return; 484 if( node instanceof InnerNode ) { 485 throw new IllegalArgumentException( 486 "Not allow to remove an inner node: "+NodeBase.getPath(node)); 487 } 488 LOG.info("Removing a node: "+NodeBase.getPath(node)); 489 netlock.writeLock().lock(); 490 try { 491 if (clusterMap.remove(node)) { 492 InnerNode rack = (InnerNode)getNode(node.getNetworkLocation()); 493 if (rack == null) { 494 numOfRacks--; 495 } 496 } 497 if(LOG.isDebugEnabled()) { 498 LOG.debug("NetworkTopology became:\n" + this.toString()); 499 } 500 } finally { 501 netlock.writeLock().unlock(); 502 } 503 } 504 505 /** Check if the tree contains node <i>node</i> 506 * 507 * @param node a node 508 * @return true if <i>node</i> is already in the tree; false otherwise 509 */ 510 public boolean contains(Node node) { 511 if (node == null) return false; 512 netlock.readLock().lock(); 513 try { 514 Node parent = node.getParent(); 515 for (int level = node.getLevel(); parent != null && level > 0; 516 parent = parent.getParent(), level--) { 517 if (parent == clusterMap) { 518 return true; 519 } 520 } 521 } finally { 522 netlock.readLock().unlock(); 523 } 524 return false; 525 } 526 527 /** Given a string representation of a node, return its reference 528 * 529 * @param loc 530 * a path-like string representation of a node 531 * @return a reference to the node; null if the node is not in the tree 532 */ 533 public Node getNode(String loc) { 534 netlock.readLock().lock(); 535 try { 536 loc = NodeBase.normalize(loc); 537 if (!NodeBase.ROOT.equals(loc)) 538 loc = loc.substring(1); 539 return clusterMap.getLoc(loc); 540 } finally { 541 netlock.readLock().unlock(); 542 } 543 } 544 545 /** Given a string representation of a rack for a specific network 546 * location 547 * 548 * To be overridden in subclasses for specific NetworkTopology 549 * implementations, as alternative to overriding the full 550 * {@link #getRack(String)} method. 551 * @param loc 552 * a path-like string representation of a network location 553 * @return a rack string 554 */ 555 public String getRack(String loc) { 556 return loc; 557 } 558 559 /** @return the total number of racks */ 560 public int getNumOfRacks() { 561 netlock.readLock().lock(); 562 try { 563 return numOfRacks; 564 } finally { 565 netlock.readLock().unlock(); 566 } 567 } 568 569 /** @return the total number of leaf nodes */ 570 public int getNumOfLeaves() { 571 netlock.readLock().lock(); 572 try { 573 return clusterMap.getNumOfLeaves(); 574 } finally { 575 netlock.readLock().unlock(); 576 } 577 } 578 579 /** Return the distance between two nodes 580 * It is assumed that the distance from one node to its parent is 1 581 * The distance between two nodes is calculated by summing up their distances 582 * to their closest common ancestor. 583 * @param node1 one node 584 * @param node2 another node 585 * @return the distance between node1 and node2 which is zero if they are the same 586 * or {@link Integer#MAX_VALUE} if node1 or node2 do not belong to the cluster 587 */ 588 public int getDistance(Node node1, Node node2) { 589 if (node1 == node2) { 590 return 0; 591 } 592 Node n1=node1, n2=node2; 593 int dis = 0; 594 netlock.readLock().lock(); 595 try { 596 int level1=node1.getLevel(), level2=node2.getLevel(); 597 while(n1!=null && level1>level2) { 598 n1 = n1.getParent(); 599 level1--; 600 dis++; 601 } 602 while(n2!=null && level2>level1) { 603 n2 = n2.getParent(); 604 level2--; 605 dis++; 606 } 607 while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) { 608 n1=n1.getParent(); 609 n2=n2.getParent(); 610 dis+=2; 611 } 612 } finally { 613 netlock.readLock().unlock(); 614 } 615 if (n1==null) { 616 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1)); 617 return Integer.MAX_VALUE; 618 } 619 if (n2==null) { 620 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2)); 621 return Integer.MAX_VALUE; 622 } 623 return dis+2; 624 } 625 626 /** Check if two nodes are on the same rack 627 * @param node1 one node (can be null) 628 * @param node2 another node (can be null) 629 * @return true if node1 and node2 are on the same rack; false otherwise 630 * @exception IllegalArgumentException when either node1 or node2 is null, or 631 * node1 or node2 do not belong to the cluster 632 */ 633 public boolean isOnSameRack( Node node1, Node node2) { 634 if (node1 == null || node2 == null) { 635 return false; 636 } 637 638 netlock.readLock().lock(); 639 try { 640 return isSameParents(node1, node2); 641 } finally { 642 netlock.readLock().unlock(); 643 } 644 } 645 646 /** 647 * Check if network topology is aware of NodeGroup 648 */ 649 public boolean isNodeGroupAware() { 650 return false; 651 } 652 653 /** 654 * Return false directly as not aware of NodeGroup, to be override in sub-class 655 */ 656 public boolean isOnSameNodeGroup(Node node1, Node node2) { 657 return false; 658 } 659 660 /** 661 * Compare the parents of each node for equality 662 * 663 * <p>To be overridden in subclasses for specific NetworkTopology 664 * implementations, as alternative to overriding the full 665 * {@link #isOnSameRack(Node, Node)} method. 666 * 667 * @param node1 the first node to compare 668 * @param node2 the second node to compare 669 * @return true if their parents are equal, false otherwise 670 * 671 * @see #isOnSameRack(Node, Node) 672 */ 673 protected boolean isSameParents(Node node1, Node node2) { 674 return node1.getParent()==node2.getParent(); 675 } 676 677 private static final Random r = new Random(); 678 679 @VisibleForTesting 680 void setRandomSeed(long seed) { 681 r.setSeed(seed); 682 } 683 684 /** randomly choose one node from <i>scope</i> 685 * if scope starts with ~, choose one from the all nodes except for the 686 * ones in <i>scope</i>; otherwise, choose one from <i>scope</i> 687 * @param scope range of nodes from which a node will be chosen 688 * @return the chosen node 689 */ 690 public Node chooseRandom(String scope) { 691 netlock.readLock().lock(); 692 try { 693 if (scope.startsWith("~")) { 694 return chooseRandom(NodeBase.ROOT, scope.substring(1)); 695 } else { 696 return chooseRandom(scope, null); 697 } 698 } finally { 699 netlock.readLock().unlock(); 700 } 701 } 702 703 private Node chooseRandom(String scope, String excludedScope){ 704 if (excludedScope != null) { 705 if (scope.startsWith(excludedScope)) { 706 return null; 707 } 708 if (!excludedScope.startsWith(scope)) { 709 excludedScope = null; 710 } 711 } 712 Node node = getNode(scope); 713 if (!(node instanceof InnerNode)) { 714 return node; 715 } 716 InnerNode innerNode = (InnerNode)node; 717 int numOfDatanodes = innerNode.getNumOfLeaves(); 718 if (excludedScope == null) { 719 node = null; 720 } else { 721 node = getNode(excludedScope); 722 if (!(node instanceof InnerNode)) { 723 numOfDatanodes -= 1; 724 } else { 725 numOfDatanodes -= ((InnerNode)node).getNumOfLeaves(); 726 } 727 } 728 if (numOfDatanodes == 0) { 729 throw new InvalidTopologyException( 730 "Failed to find datanode (scope=\"" + String.valueOf(scope) + 731 "\" excludedScope=\"" + String.valueOf(excludedScope) + "\")."); 732 } 733 int leaveIndex = r.nextInt(numOfDatanodes); 734 return innerNode.getLeaf(leaveIndex, node); 735 } 736 737 /** return leaves in <i>scope</i> 738 * @param scope a path string 739 * @return leaves nodes under specific scope 740 */ 741 public List<Node> getLeaves(String scope) { 742 Node node = getNode(scope); 743 List<Node> leafNodes = new ArrayList<Node>(); 744 if (!(node instanceof InnerNode)) { 745 leafNodes.add(node); 746 } else { 747 InnerNode innerNode = (InnerNode) node; 748 for (int i=0;i<innerNode.getNumOfLeaves();i++) { 749 leafNodes.add(innerNode.getLeaf(i, null)); 750 } 751 } 752 return leafNodes; 753 } 754 755 /** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i> 756 * if scope starts with ~, return the number of nodes that are not 757 * in <i>scope</i> and <i>excludedNodes</i>; 758 * @param scope a path string that may start with ~ 759 * @param excludedNodes a list of nodes 760 * @return number of available nodes 761 */ 762 public int countNumOfAvailableNodes(String scope, 763 Collection<Node> excludedNodes) { 764 boolean isExcluded=false; 765 if (scope.startsWith("~")) { 766 isExcluded=true; 767 scope=scope.substring(1); 768 } 769 scope = NodeBase.normalize(scope); 770 int excludedCountInScope = 0; // the number of nodes in both scope & excludedNodes 771 int excludedCountOffScope = 0; // the number of nodes outside scope & excludedNodes 772 netlock.readLock().lock(); 773 try { 774 for (Node node : excludedNodes) { 775 node = getNode(NodeBase.getPath(node)); 776 if (node == null) { 777 continue; 778 } 779 if ((NodeBase.getPath(node) + NodeBase.PATH_SEPARATOR_STR) 780 .startsWith(scope + NodeBase.PATH_SEPARATOR_STR)) { 781 excludedCountInScope++; 782 } else { 783 excludedCountOffScope++; 784 } 785 } 786 Node n = getNode(scope); 787 int scopeNodeCount = 0; 788 if (n != null) { 789 scopeNodeCount++; 790 } 791 if (n instanceof InnerNode) { 792 scopeNodeCount=((InnerNode)n).getNumOfLeaves(); 793 } 794 if (isExcluded) { 795 return clusterMap.getNumOfLeaves() - scopeNodeCount 796 - excludedCountOffScope; 797 } else { 798 return scopeNodeCount - excludedCountInScope; 799 } 800 } finally { 801 netlock.readLock().unlock(); 802 } 803 } 804 805 /** convert a network tree to a string */ 806 @Override 807 public String toString() { 808 // print the number of racks 809 StringBuilder tree = new StringBuilder(); 810 tree.append("Number of racks: "); 811 tree.append(numOfRacks); 812 tree.append("\n"); 813 // print the number of leaves 814 int numOfLeaves = getNumOfLeaves(); 815 tree.append("Expected number of leaves:"); 816 tree.append(numOfLeaves); 817 tree.append("\n"); 818 // print nodes 819 for(int i=0; i<numOfLeaves; i++) { 820 tree.append(NodeBase.getPath(clusterMap.getLeaf(i, null))); 821 tree.append("\n"); 822 } 823 return tree.toString(); 824 } 825 826 /** 827 * Divide networklocation string into two parts by last separator, and get 828 * the first part here. 829 * 830 * @param networkLocation 831 * @return 832 */ 833 public static String getFirstHalf(String networkLocation) { 834 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); 835 return networkLocation.substring(0, index); 836 } 837 838 /** 839 * Divide networklocation string into two parts by last separator, and get 840 * the second part here. 841 * 842 * @param networkLocation 843 * @return 844 */ 845 public static String getLastHalf(String networkLocation) { 846 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); 847 return networkLocation.substring(index); 848 } 849 850 /** 851 * Returns an integer weight which specifies how far away {node} is away from 852 * {reader}. A lower value signifies that a node is closer. 853 * 854 * @param reader Node where data will be read 855 * @param node Replica of data 856 * @return weight 857 */ 858 protected int getWeight(Node reader, Node node) { 859 // 0 is local, 1 is same rack, 2 is off rack 860 // Start off by initializing to off rack 861 int weight = 2; 862 if (reader != null) { 863 if (reader == node) { 864 weight = 0; 865 } else if (isOnSameRack(reader, node)) { 866 weight = 1; 867 } 868 } 869 return weight; 870 } 871 872 /** 873 * Sort nodes array by network distance to <i>reader</i>. 874 * <p/> 875 * In a three-level topology, a node can be either local, on the same rack, 876 * or on a different rack from the reader. Sorting the nodes based on network 877 * distance from the reader reduces network traffic and improves 878 * performance. 879 * <p/> 880 * As an additional twist, we also randomize the nodes at each network 881 * distance. This helps with load balancing when there is data skew. 882 * 883 * @param reader Node where data will be read 884 * @param nodes Available replicas with the requested data 885 * @param activeLen Number of active nodes at the front of the array 886 */ 887 public void sortByDistance(Node reader, Node[] nodes, int activeLen) { 888 /** Sort weights for the nodes array */ 889 int[] weights = new int[activeLen]; 890 for (int i=0; i<activeLen; i++) { 891 weights[i] = getWeight(reader, nodes[i]); 892 } 893 // Add weight/node pairs to a TreeMap to sort 894 TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>(); 895 for (int i=0; i<activeLen; i++) { 896 int weight = weights[i]; 897 Node node = nodes[i]; 898 List<Node> list = tree.get(weight); 899 if (list == null) { 900 list = Lists.newArrayListWithExpectedSize(1); 901 tree.put(weight, list); 902 } 903 list.add(node); 904 } 905 906 int idx = 0; 907 for (List<Node> list: tree.values()) { 908 if (list != null) { 909 Collections.shuffle(list, r); 910 for (Node n: list) { 911 nodes[idx] = n; 912 idx++; 913 } 914 } 915 } 916 Preconditions.checkState(idx == activeLen, 917 "Sorted the wrong number of nodes!"); 918 } 919}