001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.net; 019 020import java.util.ArrayList; 021import java.util.List; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.List; 025import java.util.Random; 026import java.util.TreeMap; 027import java.util.concurrent.locks.ReadWriteLock; 028import java.util.concurrent.locks.ReentrantReadWriteLock; 029 030import com.google.common.annotations.VisibleForTesting; 031import org.apache.commons.logging.Log; 032import org.apache.commons.logging.LogFactory; 033import org.apache.hadoop.classification.InterfaceAudience; 034import org.apache.hadoop.classification.InterfaceStability; 035import org.apache.hadoop.conf.Configuration; 036import org.apache.hadoop.fs.CommonConfigurationKeysPublic; 037import org.apache.hadoop.util.ReflectionUtils; 038 039import com.google.common.base.Preconditions; 040import com.google.common.collect.Lists; 041 042/** The class represents a cluster of computer with a tree hierarchical 043 * network topology. 044 * For example, a cluster may be consists of many data centers filled 045 * with racks of computers. 046 * In a network topology, leaves represent data nodes (computers) and inner 047 * nodes represent switches/routers that manage traffic in/out of data centers 048 * or racks. 049 * 050 */ 051@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 052@InterfaceStability.Unstable 053public class NetworkTopology { 054 public final static String DEFAULT_RACK = "/default-rack"; 055 public final static int DEFAULT_HOST_LEVEL = 2; 056 public static final Log LOG = 057 LogFactory.getLog(NetworkTopology.class); 058 059 public static class InvalidTopologyException extends RuntimeException { 060 private static final long serialVersionUID = 1L; 061 public InvalidTopologyException(String msg) { 062 super(msg); 063 } 064 } 065 066 /** 067 * Get an instance of NetworkTopology based on the value of the configuration 068 * parameter net.topology.impl. 069 * 070 * @param conf the configuration to be used 071 * @return an instance of NetworkTopology 072 */ 073 public static NetworkTopology getInstance(Configuration conf){ 074 return ReflectionUtils.newInstance( 075 conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY, 076 NetworkTopology.class, NetworkTopology.class), conf); 077 } 078 079 /** InnerNode represents a switch/router of a data center or rack. 080 * Different from a leaf node, it has non-null children. 081 */ 082 static class InnerNode extends NodeBase { 083 protected List<Node> children=new ArrayList<Node>(); 084 private int numOfLeaves; 085 086 /** Construct an InnerNode from a path-like string */ 087 InnerNode(String path) { 088 super(path); 089 } 090 091 /** Construct an InnerNode from its name and its network location */ 092 InnerNode(String name, String location) { 093 super(name, location); 094 } 095 096 /** Construct an InnerNode 097 * from its name, its network location, its parent, and its level */ 098 InnerNode(String name, String location, InnerNode parent, int level) { 099 super(name, location, parent, level); 100 } 101 102 /** @return its children */ 103 List<Node> getChildren() {return children;} 104 105 /** @return the number of children this node has */ 106 int getNumOfChildren() { 107 return children.size(); 108 } 109 110 /** Judge if this node represents a rack 111 * @return true if it has no child or its children are not InnerNodes 112 */ 113 boolean isRack() { 114 if (children.isEmpty()) { 115 return true; 116 } 117 118 Node firstChild = children.get(0); 119 if (firstChild instanceof InnerNode) { 120 return false; 121 } 122 123 return true; 124 } 125 126 /** Judge if this node is an ancestor of node <i>n</i> 127 * 128 * @param n a node 129 * @return true if this node is an ancestor of <i>n</i> 130 */ 131 boolean isAncestor(Node n) { 132 return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) || 133 (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR). 134 startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR); 135 } 136 137 /** Judge if this node is the parent of node <i>n</i> 138 * 139 * @param n a node 140 * @return true if this node is the parent of <i>n</i> 141 */ 142 boolean isParent(Node n) { 143 return n.getNetworkLocation().equals(getPath(this)); 144 } 145 146 /* Return a child name of this node who is an ancestor of node <i>n</i> */ 147 private String getNextAncestorName(Node n) { 148 if (!isAncestor(n)) { 149 throw new IllegalArgumentException( 150 this + "is not an ancestor of " + n); 151 } 152 String name = n.getNetworkLocation().substring(getPath(this).length()); 153 if (name.charAt(0) == PATH_SEPARATOR) { 154 name = name.substring(1); 155 } 156 int index=name.indexOf(PATH_SEPARATOR); 157 if (index !=-1) 158 name = name.substring(0, index); 159 return name; 160 } 161 162 /** Add node <i>n</i> to the subtree of this node 163 * @param n node to be added 164 * @return true if the node is added; false otherwise 165 */ 166 boolean add(Node n) { 167 if (!isAncestor(n)) 168 throw new IllegalArgumentException(n.getName()+", which is located at " 169 +n.getNetworkLocation()+", is not a decendent of " 170 +getPath(this)); 171 if (isParent(n)) { 172 // this node is the parent of n; add n directly 173 n.setParent(this); 174 n.setLevel(this.level+1); 175 for(int i=0; i<children.size(); i++) { 176 if (children.get(i).getName().equals(n.getName())) { 177 children.set(i, n); 178 return false; 179 } 180 } 181 children.add(n); 182 numOfLeaves++; 183 return true; 184 } else { 185 // find the next ancestor node 186 String parentName = getNextAncestorName(n); 187 InnerNode parentNode = null; 188 for(int i=0; i<children.size(); i++) { 189 if (children.get(i).getName().equals(parentName)) { 190 parentNode = (InnerNode)children.get(i); 191 break; 192 } 193 } 194 if (parentNode == null) { 195 // create a new InnerNode 196 parentNode = createParentNode(parentName); 197 children.add(parentNode); 198 } 199 // add n to the subtree of the next ancestor node 200 if (parentNode.add(n)) { 201 numOfLeaves++; 202 return true; 203 } else { 204 return false; 205 } 206 } 207 } 208 209 /** 210 * Creates a parent node to be added to the list of children. 211 * Creates a node using the InnerNode four argument constructor specifying 212 * the name, location, parent, and level of this node. 213 * 214 * <p>To be overridden in subclasses for specific InnerNode implementations, 215 * as alternative to overriding the full {@link #add(Node)} method. 216 * 217 * @param parentName The name of the parent node 218 * @return A new inner node 219 * @see InnerNode#InnerNode(String, String, InnerNode, int) 220 */ 221 protected InnerNode createParentNode(String parentName) { 222 return new InnerNode(parentName, getPath(this), this, this.getLevel()+1); 223 } 224 225 /** Remove node <i>n</i> from the subtree of this node 226 * @param n node to be deleted 227 * @return true if the node is deleted; false otherwise 228 */ 229 boolean remove(Node n) { 230 String parent = n.getNetworkLocation(); 231 String currentPath = getPath(this); 232 if (!isAncestor(n)) 233 throw new IllegalArgumentException(n.getName() 234 +", which is located at " 235 +parent+", is not a descendent of "+currentPath); 236 if (isParent(n)) { 237 // this node is the parent of n; remove n directly 238 for(int i=0; i<children.size(); i++) { 239 if (children.get(i).getName().equals(n.getName())) { 240 children.remove(i); 241 numOfLeaves--; 242 n.setParent(null); 243 return true; 244 } 245 } 246 return false; 247 } else { 248 // find the next ancestor node: the parent node 249 String parentName = getNextAncestorName(n); 250 InnerNode parentNode = null; 251 int i; 252 for(i=0; i<children.size(); i++) { 253 if (children.get(i).getName().equals(parentName)) { 254 parentNode = (InnerNode)children.get(i); 255 break; 256 } 257 } 258 if (parentNode==null) { 259 return false; 260 } 261 // remove n from the parent node 262 boolean isRemoved = parentNode.remove(n); 263 // if the parent node has no children, remove the parent node too 264 if (isRemoved) { 265 if (parentNode.getNumOfChildren() == 0) { 266 children.remove(i); 267 } 268 numOfLeaves--; 269 } 270 return isRemoved; 271 } 272 } // end of remove 273 274 /** Given a node's string representation, return a reference to the node 275 * @param loc string location of the form /rack/node 276 * @return null if the node is not found or the childnode is there but 277 * not an instance of {@link InnerNode} 278 */ 279 private Node getLoc(String loc) { 280 if (loc == null || loc.length() == 0) return this; 281 282 String[] path = loc.split(PATH_SEPARATOR_STR, 2); 283 Node childnode = null; 284 for(int i=0; i<children.size(); i++) { 285 if (children.get(i).getName().equals(path[0])) { 286 childnode = children.get(i); 287 } 288 } 289 if (childnode == null) return null; // non-existing node 290 if (path.length == 1) return childnode; 291 if (childnode instanceof InnerNode) { 292 return ((InnerNode)childnode).getLoc(path[1]); 293 } else { 294 return null; 295 } 296 } 297 298 /** get <i>leafIndex</i> leaf of this subtree 299 * if it is not in the <i>excludedNode</i> 300 * 301 * @param leafIndex an indexed leaf of the node 302 * @param excludedNode an excluded node (can be null) 303 * @return 304 */ 305 Node getLeaf(int leafIndex, Node excludedNode) { 306 int count=0; 307 // check if the excluded node a leaf 308 boolean isLeaf = 309 excludedNode == null || !(excludedNode instanceof InnerNode); 310 // calculate the total number of excluded leaf nodes 311 int numOfExcludedLeaves = 312 isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves(); 313 if (isLeafParent()) { // children are leaves 314 if (isLeaf) { // excluded node is a leaf node 315 int excludedIndex = children.indexOf(excludedNode); 316 if (excludedIndex != -1 && leafIndex >= 0) { 317 // excluded node is one of the children so adjust the leaf index 318 leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex; 319 } 320 } 321 // range check 322 if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) { 323 return null; 324 } 325 return children.get(leafIndex); 326 } else { 327 for(int i=0; i<children.size(); i++) { 328 InnerNode child = (InnerNode)children.get(i); 329 if (excludedNode == null || excludedNode != child) { 330 // not the excludedNode 331 int numOfLeaves = child.getNumOfLeaves(); 332 if (excludedNode != null && child.isAncestor(excludedNode)) { 333 numOfLeaves -= numOfExcludedLeaves; 334 } 335 if (count+numOfLeaves > leafIndex) { 336 // the leaf is in the child subtree 337 return child.getLeaf(leafIndex-count, excludedNode); 338 } else { 339 // go to the next child 340 count = count+numOfLeaves; 341 } 342 } else { // it is the excluededNode 343 // skip it and set the excludedNode to be null 344 excludedNode = null; 345 } 346 } 347 return null; 348 } 349 } 350 351 protected boolean isLeafParent() { 352 return isRack(); 353 } 354 355 /** 356 * Determine if children a leaves, default implementation calls {@link #isRack()} 357 * <p>To be overridden in subclasses for specific InnerNode implementations, 358 * as alternative to overriding the full {@link #getLeaf(int, Node)} method. 359 * 360 * @return true if children are leaves, false otherwise 361 */ 362 protected boolean areChildrenLeaves() { 363 return isRack(); 364 } 365 366 /** 367 * Get number of leaves. 368 */ 369 int getNumOfLeaves() { 370 return numOfLeaves; 371 } 372 } // end of InnerNode 373 374 /** 375 * the root cluster map 376 */ 377 InnerNode clusterMap; 378 /** Depth of all leaf nodes */ 379 private int depthOfAllLeaves = -1; 380 /** rack counter */ 381 protected int numOfRacks = 0; 382 383 /** 384 * Whether or not this cluster has ever consisted of more than 1 rack, 385 * according to the NetworkTopology. 386 */ 387 private boolean clusterEverBeenMultiRack = false; 388 389 /** the lock used to manage access */ 390 protected ReadWriteLock netlock = new ReentrantReadWriteLock(); 391 392 public NetworkTopology() { 393 clusterMap = new InnerNode(InnerNode.ROOT); 394 } 395 396 /** Add a leaf node 397 * Update node counter & rack counter if necessary 398 * @param node node to be added; can be null 399 * @exception IllegalArgumentException if add a node to a leave 400 or node to be added is not a leaf 401 */ 402 public void add(Node node) { 403 if (node==null) return; 404 int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1; 405 netlock.writeLock().lock(); 406 try { 407 if( node instanceof InnerNode ) { 408 throw new IllegalArgumentException( 409 "Not allow to add an inner node: "+NodeBase.getPath(node)); 410 } 411 if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) { 412 LOG.error("Error: can't add leaf node " + NodeBase.getPath(node) + 413 " at depth " + newDepth + " to topology:\n" + this.toString()); 414 throw new InvalidTopologyException("Failed to add " + NodeBase.getPath(node) + 415 ": You cannot have a rack and a non-rack node at the same " + 416 "level of the network topology."); 417 } 418 Node rack = getNodeForNetworkLocation(node); 419 if (rack != null && !(rack instanceof InnerNode)) { 420 throw new IllegalArgumentException("Unexpected data node " 421 + node.toString() 422 + " at an illegal network location"); 423 } 424 if (clusterMap.add(node)) { 425 LOG.info("Adding a new node: "+NodeBase.getPath(node)); 426 if (rack == null) { 427 incrementRacks(); 428 } 429 if (!(node instanceof InnerNode)) { 430 if (depthOfAllLeaves == -1) { 431 depthOfAllLeaves = node.getLevel(); 432 } 433 } 434 } 435 if(LOG.isDebugEnabled()) { 436 LOG.debug("NetworkTopology became:\n" + this.toString()); 437 } 438 } finally { 439 netlock.writeLock().unlock(); 440 } 441 } 442 443 protected void incrementRacks() { 444 numOfRacks++; 445 if (!clusterEverBeenMultiRack && numOfRacks > 1) { 446 clusterEverBeenMultiRack = true; 447 } 448 } 449 450 /** 451 * Return a reference to the node given its string representation. 452 * Default implementation delegates to {@link #getNode(String)}. 453 * 454 * <p>To be overridden in subclasses for specific NetworkTopology 455 * implementations, as alternative to overriding the full {@link #add(Node)} 456 * method. 457 * 458 * @param node The string representation of this node's network location is 459 * used to retrieve a Node object. 460 * @return a reference to the node; null if the node is not in the tree 461 * 462 * @see #add(Node) 463 * @see #getNode(String) 464 */ 465 protected Node getNodeForNetworkLocation(Node node) { 466 return getNode(node.getNetworkLocation()); 467 } 468 469 /** 470 * Given a string representation of a rack, return its children 471 * @param loc a path-like string representation of a rack 472 * @return a newly allocated list with all the node's children 473 */ 474 public List<Node> getDatanodesInRack(String loc) { 475 netlock.readLock().lock(); 476 try { 477 loc = NodeBase.normalize(loc); 478 if (!NodeBase.ROOT.equals(loc)) { 479 loc = loc.substring(1); 480 } 481 InnerNode rack = (InnerNode) clusterMap.getLoc(loc); 482 if (rack == null) { 483 return null; 484 } 485 return new ArrayList<Node>(rack.getChildren()); 486 } finally { 487 netlock.readLock().unlock(); 488 } 489 } 490 491 /** Remove a node 492 * Update node counter and rack counter if necessary 493 * @param node node to be removed; can be null 494 */ 495 public void remove(Node node) { 496 if (node==null) return; 497 if( node instanceof InnerNode ) { 498 throw new IllegalArgumentException( 499 "Not allow to remove an inner node: "+NodeBase.getPath(node)); 500 } 501 LOG.info("Removing a node: "+NodeBase.getPath(node)); 502 netlock.writeLock().lock(); 503 try { 504 if (clusterMap.remove(node)) { 505 InnerNode rack = (InnerNode)getNode(node.getNetworkLocation()); 506 if (rack == null) { 507 numOfRacks--; 508 } 509 } 510 if(LOG.isDebugEnabled()) { 511 LOG.debug("NetworkTopology became:\n" + this.toString()); 512 } 513 } finally { 514 netlock.writeLock().unlock(); 515 } 516 } 517 518 /** Check if the tree contains node <i>node</i> 519 * 520 * @param node a node 521 * @return true if <i>node</i> is already in the tree; false otherwise 522 */ 523 public boolean contains(Node node) { 524 if (node == null) return false; 525 netlock.readLock().lock(); 526 try { 527 Node parent = node.getParent(); 528 for (int level = node.getLevel(); parent != null && level > 0; 529 parent = parent.getParent(), level--) { 530 if (parent == clusterMap) { 531 return true; 532 } 533 } 534 } finally { 535 netlock.readLock().unlock(); 536 } 537 return false; 538 } 539 540 /** Given a string representation of a node, return its reference 541 * 542 * @param loc 543 * a path-like string representation of a node 544 * @return a reference to the node; null if the node is not in the tree 545 */ 546 public Node getNode(String loc) { 547 netlock.readLock().lock(); 548 try { 549 loc = NodeBase.normalize(loc); 550 if (!NodeBase.ROOT.equals(loc)) 551 loc = loc.substring(1); 552 return clusterMap.getLoc(loc); 553 } finally { 554 netlock.readLock().unlock(); 555 } 556 } 557 558 /** 559 * @return true if this cluster has ever consisted of multiple racks, even if 560 * it is not now a multi-rack cluster. 561 */ 562 public boolean hasClusterEverBeenMultiRack() { 563 return clusterEverBeenMultiRack; 564 } 565 566 /** Given a string representation of a rack for a specific network 567 * location 568 * 569 * To be overridden in subclasses for specific NetworkTopology 570 * implementations, as alternative to overriding the full 571 * {@link #getRack(String)} method. 572 * @param loc 573 * a path-like string representation of a network location 574 * @return a rack string 575 */ 576 public String getRack(String loc) { 577 return loc; 578 } 579 580 /** @return the total number of racks */ 581 public int getNumOfRacks() { 582 netlock.readLock().lock(); 583 try { 584 return numOfRacks; 585 } finally { 586 netlock.readLock().unlock(); 587 } 588 } 589 590 /** @return the total number of leaf nodes */ 591 public int getNumOfLeaves() { 592 netlock.readLock().lock(); 593 try { 594 return clusterMap.getNumOfLeaves(); 595 } finally { 596 netlock.readLock().unlock(); 597 } 598 } 599 600 /** Return the distance between two nodes 601 * It is assumed that the distance from one node to its parent is 1 602 * The distance between two nodes is calculated by summing up their distances 603 * to their closest common ancestor. 604 * @param node1 one node 605 * @param node2 another node 606 * @return the distance between node1 and node2 which is zero if they are the same 607 * or {@link Integer#MAX_VALUE} if node1 or node2 do not belong to the cluster 608 */ 609 public int getDistance(Node node1, Node node2) { 610 if (node1 == node2) { 611 return 0; 612 } 613 Node n1=node1, n2=node2; 614 int dis = 0; 615 netlock.readLock().lock(); 616 try { 617 int level1=node1.getLevel(), level2=node2.getLevel(); 618 while(n1!=null && level1>level2) { 619 n1 = n1.getParent(); 620 level1--; 621 dis++; 622 } 623 while(n2!=null && level2>level1) { 624 n2 = n2.getParent(); 625 level2--; 626 dis++; 627 } 628 while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) { 629 n1=n1.getParent(); 630 n2=n2.getParent(); 631 dis+=2; 632 } 633 } finally { 634 netlock.readLock().unlock(); 635 } 636 if (n1==null) { 637 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1)); 638 return Integer.MAX_VALUE; 639 } 640 if (n2==null) { 641 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2)); 642 return Integer.MAX_VALUE; 643 } 644 return dis+2; 645 } 646 647 /** Check if two nodes are on the same rack 648 * @param node1 one node (can be null) 649 * @param node2 another node (can be null) 650 * @return true if node1 and node2 are on the same rack; false otherwise 651 * @exception IllegalArgumentException when either node1 or node2 is null, or 652 * node1 or node2 do not belong to the cluster 653 */ 654 public boolean isOnSameRack( Node node1, Node node2) { 655 if (node1 == null || node2 == null) { 656 return false; 657 } 658 659 netlock.readLock().lock(); 660 try { 661 return isSameParents(node1, node2); 662 } finally { 663 netlock.readLock().unlock(); 664 } 665 } 666 667 /** 668 * Check if network topology is aware of NodeGroup 669 */ 670 public boolean isNodeGroupAware() { 671 return false; 672 } 673 674 /** 675 * Return false directly as not aware of NodeGroup, to be override in sub-class 676 */ 677 public boolean isOnSameNodeGroup(Node node1, Node node2) { 678 return false; 679 } 680 681 /** 682 * Compare the parents of each node for equality 683 * 684 * <p>To be overridden in subclasses for specific NetworkTopology 685 * implementations, as alternative to overriding the full 686 * {@link #isOnSameRack(Node, Node)} method. 687 * 688 * @param node1 the first node to compare 689 * @param node2 the second node to compare 690 * @return true if their parents are equal, false otherwise 691 * 692 * @see #isOnSameRack(Node, Node) 693 */ 694 protected boolean isSameParents(Node node1, Node node2) { 695 return node1.getParent()==node2.getParent(); 696 } 697 698 private static final Random r = new Random(); 699 700 @VisibleForTesting 701 void setRandomSeed(long seed) { 702 r.setSeed(seed); 703 } 704 705 /** randomly choose one node from <i>scope</i> 706 * if scope starts with ~, choose one from the all nodes except for the 707 * ones in <i>scope</i>; otherwise, choose one from <i>scope</i> 708 * @param scope range of nodes from which a node will be chosen 709 * @return the chosen node 710 */ 711 public Node chooseRandom(String scope) { 712 netlock.readLock().lock(); 713 try { 714 if (scope.startsWith("~")) { 715 return chooseRandom(NodeBase.ROOT, scope.substring(1)); 716 } else { 717 return chooseRandom(scope, null); 718 } 719 } finally { 720 netlock.readLock().unlock(); 721 } 722 } 723 724 private Node chooseRandom(String scope, String excludedScope){ 725 if (excludedScope != null) { 726 if (scope.startsWith(excludedScope)) { 727 return null; 728 } 729 if (!excludedScope.startsWith(scope)) { 730 excludedScope = null; 731 } 732 } 733 Node node = getNode(scope); 734 if (!(node instanceof InnerNode)) { 735 return node; 736 } 737 InnerNode innerNode = (InnerNode)node; 738 int numOfDatanodes = innerNode.getNumOfLeaves(); 739 if (excludedScope == null) { 740 node = null; 741 } else { 742 node = getNode(excludedScope); 743 if (!(node instanceof InnerNode)) { 744 numOfDatanodes -= 1; 745 } else { 746 numOfDatanodes -= ((InnerNode)node).getNumOfLeaves(); 747 } 748 } 749 if (numOfDatanodes == 0) { 750 throw new InvalidTopologyException( 751 "Failed to find datanode (scope=\"" + String.valueOf(scope) + 752 "\" excludedScope=\"" + String.valueOf(excludedScope) + "\")."); 753 } 754 int leaveIndex = r.nextInt(numOfDatanodes); 755 return innerNode.getLeaf(leaveIndex, node); 756 } 757 758 /** return leaves in <i>scope</i> 759 * @param scope a path string 760 * @return leaves nodes under specific scope 761 */ 762 public List<Node> getLeaves(String scope) { 763 Node node = getNode(scope); 764 List<Node> leafNodes = new ArrayList<Node>(); 765 if (!(node instanceof InnerNode)) { 766 leafNodes.add(node); 767 } else { 768 InnerNode innerNode = (InnerNode) node; 769 for (int i=0;i<innerNode.getNumOfLeaves();i++) { 770 leafNodes.add(innerNode.getLeaf(i, null)); 771 } 772 } 773 return leafNodes; 774 } 775 776 /** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i> 777 * if scope starts with ~, return the number of nodes that are not 778 * in <i>scope</i> and <i>excludedNodes</i>; 779 * @param scope a path string that may start with ~ 780 * @param excludedNodes a list of nodes 781 * @return number of available nodes 782 */ 783 public int countNumOfAvailableNodes(String scope, 784 Collection<Node> excludedNodes) { 785 boolean isExcluded=false; 786 if (scope.startsWith("~")) { 787 isExcluded=true; 788 scope=scope.substring(1); 789 } 790 scope = NodeBase.normalize(scope); 791 int excludedCountInScope = 0; // the number of nodes in both scope & excludedNodes 792 int excludedCountOffScope = 0; // the number of nodes outside scope & excludedNodes 793 netlock.readLock().lock(); 794 try { 795 for (Node node : excludedNodes) { 796 node = getNode(NodeBase.getPath(node)); 797 if (node == null) { 798 continue; 799 } 800 if ((NodeBase.getPath(node) + NodeBase.PATH_SEPARATOR_STR) 801 .startsWith(scope + NodeBase.PATH_SEPARATOR_STR)) { 802 excludedCountInScope++; 803 } else { 804 excludedCountOffScope++; 805 } 806 } 807 Node n = getNode(scope); 808 int scopeNodeCount = 0; 809 if (n != null) { 810 scopeNodeCount++; 811 } 812 if (n instanceof InnerNode) { 813 scopeNodeCount=((InnerNode)n).getNumOfLeaves(); 814 } 815 if (isExcluded) { 816 return clusterMap.getNumOfLeaves() - scopeNodeCount 817 - excludedCountOffScope; 818 } else { 819 return scopeNodeCount - excludedCountInScope; 820 } 821 } finally { 822 netlock.readLock().unlock(); 823 } 824 } 825 826 /** convert a network tree to a string */ 827 @Override 828 public String toString() { 829 // print the number of racks 830 StringBuilder tree = new StringBuilder(); 831 tree.append("Number of racks: "); 832 tree.append(numOfRacks); 833 tree.append("\n"); 834 // print the number of leaves 835 int numOfLeaves = getNumOfLeaves(); 836 tree.append("Expected number of leaves:"); 837 tree.append(numOfLeaves); 838 tree.append("\n"); 839 // print nodes 840 for(int i=0; i<numOfLeaves; i++) { 841 tree.append(NodeBase.getPath(clusterMap.getLeaf(i, null))); 842 tree.append("\n"); 843 } 844 return tree.toString(); 845 } 846 847 /** 848 * Divide networklocation string into two parts by last separator, and get 849 * the first part here. 850 * 851 * @param networkLocation 852 * @return 853 */ 854 public static String getFirstHalf(String networkLocation) { 855 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); 856 return networkLocation.substring(0, index); 857 } 858 859 /** 860 * Divide networklocation string into two parts by last separator, and get 861 * the second part here. 862 * 863 * @param networkLocation 864 * @return 865 */ 866 public static String getLastHalf(String networkLocation) { 867 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); 868 return networkLocation.substring(index); 869 } 870 871 /** 872 * Returns an integer weight which specifies how far away {node} is away from 873 * {reader}. A lower value signifies that a node is closer. 874 * 875 * @param reader Node where data will be read 876 * @param node Replica of data 877 * @return weight 878 */ 879 protected int getWeight(Node reader, Node node) { 880 // 0 is local, 1 is same rack, 2 is off rack 881 // Start off by initializing to off rack 882 int weight = 2; 883 if (reader != null) { 884 if (reader.equals(node)) { 885 weight = 0; 886 } else if (isOnSameRack(reader, node)) { 887 weight = 1; 888 } 889 } 890 return weight; 891 } 892 893 /** 894 * Sort nodes array by network distance to <i>reader</i>. 895 * <p/> 896 * In a three-level topology, a node can be either local, on the same rack, 897 * or on a different rack from the reader. Sorting the nodes based on network 898 * distance from the reader reduces network traffic and improves 899 * performance. 900 * <p/> 901 * As an additional twist, we also randomize the nodes at each network 902 * distance. This helps with load balancing when there is data skew. 903 * 904 * @param reader Node where data will be read 905 * @param nodes Available replicas with the requested data 906 * @param activeLen Number of active nodes at the front of the array 907 */ 908 public void sortByDistance(Node reader, Node[] nodes, int activeLen) { 909 /** Sort weights for the nodes array */ 910 int[] weights = new int[activeLen]; 911 for (int i=0; i<activeLen; i++) { 912 weights[i] = getWeight(reader, nodes[i]); 913 } 914 // Add weight/node pairs to a TreeMap to sort 915 TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>(); 916 for (int i=0; i<activeLen; i++) { 917 int weight = weights[i]; 918 Node node = nodes[i]; 919 List<Node> list = tree.get(weight); 920 if (list == null) { 921 list = Lists.newArrayListWithExpectedSize(1); 922 tree.put(weight, list); 923 } 924 list.add(node); 925 } 926 927 int idx = 0; 928 for (List<Node> list: tree.values()) { 929 if (list != null) { 930 Collections.shuffle(list, r); 931 for (Node n: list) { 932 nodes[idx] = n; 933 idx++; 934 } 935 } 936 } 937 Preconditions.checkState(idx == activeLen, 938 "Sorted the wrong number of nodes!"); 939 } 940}