001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.net;
019
020 import java.util.ArrayList;
021 import java.util.List;
022 import java.util.Collection;
023 import java.util.Collections;
024 import java.util.List;
025 import java.util.Random;
026 import java.util.TreeMap;
027 import java.util.concurrent.locks.ReadWriteLock;
028 import java.util.concurrent.locks.ReentrantReadWriteLock;
029
030 import com.google.common.annotations.VisibleForTesting;
031 import org.apache.commons.logging.Log;
032 import org.apache.commons.logging.LogFactory;
033 import org.apache.hadoop.classification.InterfaceAudience;
034 import org.apache.hadoop.classification.InterfaceStability;
035 import org.apache.hadoop.conf.Configuration;
036 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
037 import org.apache.hadoop.util.ReflectionUtils;
038
039 import com.google.common.base.Preconditions;
040 import com.google.common.collect.Lists;
041
042 /** The class represents a cluster of computer with a tree hierarchical
043 * network topology.
044 * For example, a cluster may be consists of many data centers filled
045 * with racks of computers.
046 * In a network topology, leaves represent data nodes (computers) and inner
047 * nodes represent switches/routers that manage traffic in/out of data centers
048 * or racks.
049 *
050 */
051 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
052 @InterfaceStability.Unstable
053 public class NetworkTopology {
054 public final static String DEFAULT_RACK = "/default-rack";
055 public final static int DEFAULT_HOST_LEVEL = 2;
056 public static final Log LOG =
057 LogFactory.getLog(NetworkTopology.class);
058
059 public static class InvalidTopologyException extends RuntimeException {
060 private static final long serialVersionUID = 1L;
061 public InvalidTopologyException(String msg) {
062 super(msg);
063 }
064 }
065
066 /**
067 * Get an instance of NetworkTopology based on the value of the configuration
068 * parameter net.topology.impl.
069 *
070 * @param conf the configuration to be used
071 * @return an instance of NetworkTopology
072 */
073 public static NetworkTopology getInstance(Configuration conf){
074 return ReflectionUtils.newInstance(
075 conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
076 NetworkTopology.class, NetworkTopology.class), conf);
077 }
078
079 /** InnerNode represents a switch/router of a data center or rack.
080 * Different from a leaf node, it has non-null children.
081 */
082 static class InnerNode extends NodeBase {
083 protected List<Node> children=new ArrayList<Node>();
084 private int numOfLeaves;
085
086 /** Construct an InnerNode from a path-like string */
087 InnerNode(String path) {
088 super(path);
089 }
090
091 /** Construct an InnerNode from its name and its network location */
092 InnerNode(String name, String location) {
093 super(name, location);
094 }
095
096 /** Construct an InnerNode
097 * from its name, its network location, its parent, and its level */
098 InnerNode(String name, String location, InnerNode parent, int level) {
099 super(name, location, parent, level);
100 }
101
102 /** @return its children */
103 List<Node> getChildren() {return children;}
104
105 /** @return the number of children this node has */
106 int getNumOfChildren() {
107 return children.size();
108 }
109
110 /** Judge if this node represents a rack
111 * @return true if it has no child or its children are not InnerNodes
112 */
113 boolean isRack() {
114 if (children.isEmpty()) {
115 return true;
116 }
117
118 Node firstChild = children.get(0);
119 if (firstChild instanceof InnerNode) {
120 return false;
121 }
122
123 return true;
124 }
125
126 /** Judge if this node is an ancestor of node <i>n</i>
127 *
128 * @param n a node
129 * @return true if this node is an ancestor of <i>n</i>
130 */
131 boolean isAncestor(Node n) {
132 return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) ||
133 (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR).
134 startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR);
135 }
136
137 /** Judge if this node is the parent of node <i>n</i>
138 *
139 * @param n a node
140 * @return true if this node is the parent of <i>n</i>
141 */
142 boolean isParent(Node n) {
143 return n.getNetworkLocation().equals(getPath(this));
144 }
145
146 /* Return a child name of this node who is an ancestor of node <i>n</i> */
147 private String getNextAncestorName(Node n) {
148 if (!isAncestor(n)) {
149 throw new IllegalArgumentException(
150 this + "is not an ancestor of " + n);
151 }
152 String name = n.getNetworkLocation().substring(getPath(this).length());
153 if (name.charAt(0) == PATH_SEPARATOR) {
154 name = name.substring(1);
155 }
156 int index=name.indexOf(PATH_SEPARATOR);
157 if (index !=-1)
158 name = name.substring(0, index);
159 return name;
160 }
161
162 /** Add node <i>n</i> to the subtree of this node
163 * @param n node to be added
164 * @return true if the node is added; false otherwise
165 */
166 boolean add(Node n) {
167 if (!isAncestor(n))
168 throw new IllegalArgumentException(n.getName()+", which is located at "
169 +n.getNetworkLocation()+", is not a decendent of "
170 +getPath(this));
171 if (isParent(n)) {
172 // this node is the parent of n; add n directly
173 n.setParent(this);
174 n.setLevel(this.level+1);
175 for(int i=0; i<children.size(); i++) {
176 if (children.get(i).getName().equals(n.getName())) {
177 children.set(i, n);
178 return false;
179 }
180 }
181 children.add(n);
182 numOfLeaves++;
183 return true;
184 } else {
185 // find the next ancestor node
186 String parentName = getNextAncestorName(n);
187 InnerNode parentNode = null;
188 for(int i=0; i<children.size(); i++) {
189 if (children.get(i).getName().equals(parentName)) {
190 parentNode = (InnerNode)children.get(i);
191 break;
192 }
193 }
194 if (parentNode == null) {
195 // create a new InnerNode
196 parentNode = createParentNode(parentName);
197 children.add(parentNode);
198 }
199 // add n to the subtree of the next ancestor node
200 if (parentNode.add(n)) {
201 numOfLeaves++;
202 return true;
203 } else {
204 return false;
205 }
206 }
207 }
208
209 /**
210 * Creates a parent node to be added to the list of children.
211 * Creates a node using the InnerNode four argument constructor specifying
212 * the name, location, parent, and level of this node.
213 *
214 * <p>To be overridden in subclasses for specific InnerNode implementations,
215 * as alternative to overriding the full {@link #add(Node)} method.
216 *
217 * @param parentName The name of the parent node
218 * @return A new inner node
219 * @see InnerNode#InnerNode(String, String, InnerNode, int)
220 */
221 protected InnerNode createParentNode(String parentName) {
222 return new InnerNode(parentName, getPath(this), this, this.getLevel()+1);
223 }
224
225 /** Remove node <i>n</i> from the subtree of this node
226 * @param n node to be deleted
227 * @return true if the node is deleted; false otherwise
228 */
229 boolean remove(Node n) {
230 String parent = n.getNetworkLocation();
231 String currentPath = getPath(this);
232 if (!isAncestor(n))
233 throw new IllegalArgumentException(n.getName()
234 +", which is located at "
235 +parent+", is not a descendent of "+currentPath);
236 if (isParent(n)) {
237 // this node is the parent of n; remove n directly
238 for(int i=0; i<children.size(); i++) {
239 if (children.get(i).getName().equals(n.getName())) {
240 children.remove(i);
241 numOfLeaves--;
242 n.setParent(null);
243 return true;
244 }
245 }
246 return false;
247 } else {
248 // find the next ancestor node: the parent node
249 String parentName = getNextAncestorName(n);
250 InnerNode parentNode = null;
251 int i;
252 for(i=0; i<children.size(); i++) {
253 if (children.get(i).getName().equals(parentName)) {
254 parentNode = (InnerNode)children.get(i);
255 break;
256 }
257 }
258 if (parentNode==null) {
259 return false;
260 }
261 // remove n from the parent node
262 boolean isRemoved = parentNode.remove(n);
263 // if the parent node has no children, remove the parent node too
264 if (isRemoved) {
265 if (parentNode.getNumOfChildren() == 0) {
266 children.remove(i);
267 }
268 numOfLeaves--;
269 }
270 return isRemoved;
271 }
272 } // end of remove
273
274 /** Given a node's string representation, return a reference to the node
275 * @param loc string location of the form /rack/node
276 * @return null if the node is not found or the childnode is there but
277 * not an instance of {@link InnerNode}
278 */
279 private Node getLoc(String loc) {
280 if (loc == null || loc.length() == 0) return this;
281
282 String[] path = loc.split(PATH_SEPARATOR_STR, 2);
283 Node childnode = null;
284 for(int i=0; i<children.size(); i++) {
285 if (children.get(i).getName().equals(path[0])) {
286 childnode = children.get(i);
287 }
288 }
289 if (childnode == null) return null; // non-existing node
290 if (path.length == 1) return childnode;
291 if (childnode instanceof InnerNode) {
292 return ((InnerNode)childnode).getLoc(path[1]);
293 } else {
294 return null;
295 }
296 }
297
298 /** get <i>leafIndex</i> leaf of this subtree
299 * if it is not in the <i>excludedNode</i>
300 *
301 * @param leafIndex an indexed leaf of the node
302 * @param excludedNode an excluded node (can be null)
303 * @return
304 */
305 Node getLeaf(int leafIndex, Node excludedNode) {
306 int count=0;
307 // check if the excluded node a leaf
308 boolean isLeaf =
309 excludedNode == null || !(excludedNode instanceof InnerNode);
310 // calculate the total number of excluded leaf nodes
311 int numOfExcludedLeaves =
312 isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves();
313 if (isLeafParent()) { // children are leaves
314 if (isLeaf) { // excluded node is a leaf node
315 int excludedIndex = children.indexOf(excludedNode);
316 if (excludedIndex != -1 && leafIndex >= 0) {
317 // excluded node is one of the children so adjust the leaf index
318 leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex;
319 }
320 }
321 // range check
322 if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) {
323 return null;
324 }
325 return children.get(leafIndex);
326 } else {
327 for(int i=0; i<children.size(); i++) {
328 InnerNode child = (InnerNode)children.get(i);
329 if (excludedNode == null || excludedNode != child) {
330 // not the excludedNode
331 int numOfLeaves = child.getNumOfLeaves();
332 if (excludedNode != null && child.isAncestor(excludedNode)) {
333 numOfLeaves -= numOfExcludedLeaves;
334 }
335 if (count+numOfLeaves > leafIndex) {
336 // the leaf is in the child subtree
337 return child.getLeaf(leafIndex-count, excludedNode);
338 } else {
339 // go to the next child
340 count = count+numOfLeaves;
341 }
342 } else { // it is the excluededNode
343 // skip it and set the excludedNode to be null
344 excludedNode = null;
345 }
346 }
347 return null;
348 }
349 }
350
351 protected boolean isLeafParent() {
352 return isRack();
353 }
354
355 /**
356 * Determine if children a leaves, default implementation calls {@link #isRack()}
357 * <p>To be overridden in subclasses for specific InnerNode implementations,
358 * as alternative to overriding the full {@link #getLeaf(int, Node)} method.
359 *
360 * @return true if children are leaves, false otherwise
361 */
362 protected boolean areChildrenLeaves() {
363 return isRack();
364 }
365
366 /**
367 * Get number of leaves.
368 */
369 int getNumOfLeaves() {
370 return numOfLeaves;
371 }
372 } // end of InnerNode
373
374 /**
375 * the root cluster map
376 */
377 InnerNode clusterMap;
378 /** Depth of all leaf nodes */
379 private int depthOfAllLeaves = -1;
380 /** rack counter */
381 protected int numOfRacks = 0;
382 /** the lock used to manage access */
383 protected ReadWriteLock netlock = new ReentrantReadWriteLock();
384
385 public NetworkTopology() {
386 clusterMap = new InnerNode(InnerNode.ROOT);
387 }
388
389 /** Add a leaf node
390 * Update node counter & rack counter if necessary
391 * @param node node to be added; can be null
392 * @exception IllegalArgumentException if add a node to a leave
393 or node to be added is not a leaf
394 */
395 public void add(Node node) {
396 if (node==null) return;
397 String oldTopoStr = this.toString();
398 if( node instanceof InnerNode ) {
399 throw new IllegalArgumentException(
400 "Not allow to add an inner node: "+NodeBase.getPath(node));
401 }
402 int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1;
403 netlock.writeLock().lock();
404 try {
405 if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) {
406 LOG.error("Error: can't add leaf node " + NodeBase.getPath(node) +
407 " at depth " + newDepth + " to topology:\n" + oldTopoStr);
408 throw new InvalidTopologyException("Failed to add " + NodeBase.getPath(node) +
409 ": You cannot have a rack and a non-rack node at the same " +
410 "level of the network topology.");
411 }
412 Node rack = getNodeForNetworkLocation(node);
413 if (rack != null && !(rack instanceof InnerNode)) {
414 throw new IllegalArgumentException("Unexpected data node "
415 + node.toString()
416 + " at an illegal network location");
417 }
418 if (clusterMap.add(node)) {
419 LOG.info("Adding a new node: "+NodeBase.getPath(node));
420 if (rack == null) {
421 numOfRacks++;
422 }
423 if (!(node instanceof InnerNode)) {
424 if (depthOfAllLeaves == -1) {
425 depthOfAllLeaves = node.getLevel();
426 }
427 }
428 }
429 if(LOG.isDebugEnabled()) {
430 LOG.debug("NetworkTopology became:\n" + this.toString());
431 }
432 } finally {
433 netlock.writeLock().unlock();
434 }
435 }
436
437 /**
438 * Return a reference to the node given its string representation.
439 * Default implementation delegates to {@link #getNode(String)}.
440 *
441 * <p>To be overridden in subclasses for specific NetworkTopology
442 * implementations, as alternative to overriding the full {@link #add(Node)}
443 * method.
444 *
445 * @param node The string representation of this node's network location is
446 * used to retrieve a Node object.
447 * @return a reference to the node; null if the node is not in the tree
448 *
449 * @see #add(Node)
450 * @see #getNode(String)
451 */
452 protected Node getNodeForNetworkLocation(Node node) {
453 return getNode(node.getNetworkLocation());
454 }
455
456 /**
457 * Given a string representation of a rack, return its children
458 * @param loc a path-like string representation of a rack
459 * @return a newly allocated list with all the node's children
460 */
461 public List<Node> getDatanodesInRack(String loc) {
462 netlock.readLock().lock();
463 try {
464 loc = NodeBase.normalize(loc);
465 if (!NodeBase.ROOT.equals(loc)) {
466 loc = loc.substring(1);
467 }
468 InnerNode rack = (InnerNode) clusterMap.getLoc(loc);
469 if (rack == null) {
470 return null;
471 }
472 return new ArrayList<Node>(rack.getChildren());
473 } finally {
474 netlock.readLock().unlock();
475 }
476 }
477
478 /** Remove a node
479 * Update node counter and rack counter if necessary
480 * @param node node to be removed; can be null
481 */
482 public void remove(Node node) {
483 if (node==null) return;
484 if( node instanceof InnerNode ) {
485 throw new IllegalArgumentException(
486 "Not allow to remove an inner node: "+NodeBase.getPath(node));
487 }
488 LOG.info("Removing a node: "+NodeBase.getPath(node));
489 netlock.writeLock().lock();
490 try {
491 if (clusterMap.remove(node)) {
492 InnerNode rack = (InnerNode)getNode(node.getNetworkLocation());
493 if (rack == null) {
494 numOfRacks--;
495 }
496 }
497 if(LOG.isDebugEnabled()) {
498 LOG.debug("NetworkTopology became:\n" + this.toString());
499 }
500 } finally {
501 netlock.writeLock().unlock();
502 }
503 }
504
505 /** Check if the tree contains node <i>node</i>
506 *
507 * @param node a node
508 * @return true if <i>node</i> is already in the tree; false otherwise
509 */
510 public boolean contains(Node node) {
511 if (node == null) return false;
512 netlock.readLock().lock();
513 try {
514 Node parent = node.getParent();
515 for (int level = node.getLevel(); parent != null && level > 0;
516 parent = parent.getParent(), level--) {
517 if (parent == clusterMap) {
518 return true;
519 }
520 }
521 } finally {
522 netlock.readLock().unlock();
523 }
524 return false;
525 }
526
527 /** Given a string representation of a node, return its reference
528 *
529 * @param loc
530 * a path-like string representation of a node
531 * @return a reference to the node; null if the node is not in the tree
532 */
533 public Node getNode(String loc) {
534 netlock.readLock().lock();
535 try {
536 loc = NodeBase.normalize(loc);
537 if (!NodeBase.ROOT.equals(loc))
538 loc = loc.substring(1);
539 return clusterMap.getLoc(loc);
540 } finally {
541 netlock.readLock().unlock();
542 }
543 }
544
545 /** Given a string representation of a rack for a specific network
546 * location
547 *
548 * To be overridden in subclasses for specific NetworkTopology
549 * implementations, as alternative to overriding the full
550 * {@link #getRack(String)} method.
551 * @param loc
552 * a path-like string representation of a network location
553 * @return a rack string
554 */
555 public String getRack(String loc) {
556 return loc;
557 }
558
559 /** @return the total number of racks */
560 public int getNumOfRacks() {
561 netlock.readLock().lock();
562 try {
563 return numOfRacks;
564 } finally {
565 netlock.readLock().unlock();
566 }
567 }
568
569 /** @return the total number of leaf nodes */
570 public int getNumOfLeaves() {
571 netlock.readLock().lock();
572 try {
573 return clusterMap.getNumOfLeaves();
574 } finally {
575 netlock.readLock().unlock();
576 }
577 }
578
579 /** Return the distance between two nodes
580 * It is assumed that the distance from one node to its parent is 1
581 * The distance between two nodes is calculated by summing up their distances
582 * to their closest common ancestor.
583 * @param node1 one node
584 * @param node2 another node
585 * @return the distance between node1 and node2 which is zero if they are the same
586 * or {@link Integer#MAX_VALUE} if node1 or node2 do not belong to the cluster
587 */
588 public int getDistance(Node node1, Node node2) {
589 if (node1 == node2) {
590 return 0;
591 }
592 Node n1=node1, n2=node2;
593 int dis = 0;
594 netlock.readLock().lock();
595 try {
596 int level1=node1.getLevel(), level2=node2.getLevel();
597 while(n1!=null && level1>level2) {
598 n1 = n1.getParent();
599 level1--;
600 dis++;
601 }
602 while(n2!=null && level2>level1) {
603 n2 = n2.getParent();
604 level2--;
605 dis++;
606 }
607 while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) {
608 n1=n1.getParent();
609 n2=n2.getParent();
610 dis+=2;
611 }
612 } finally {
613 netlock.readLock().unlock();
614 }
615 if (n1==null) {
616 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1));
617 return Integer.MAX_VALUE;
618 }
619 if (n2==null) {
620 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2));
621 return Integer.MAX_VALUE;
622 }
623 return dis+2;
624 }
625
626 /** Check if two nodes are on the same rack
627 * @param node1 one node (can be null)
628 * @param node2 another node (can be null)
629 * @return true if node1 and node2 are on the same rack; false otherwise
630 * @exception IllegalArgumentException when either node1 or node2 is null, or
631 * node1 or node2 do not belong to the cluster
632 */
633 public boolean isOnSameRack( Node node1, Node node2) {
634 if (node1 == null || node2 == null) {
635 return false;
636 }
637
638 netlock.readLock().lock();
639 try {
640 return isSameParents(node1, node2);
641 } finally {
642 netlock.readLock().unlock();
643 }
644 }
645
646 /**
647 * Check if network topology is aware of NodeGroup
648 */
649 public boolean isNodeGroupAware() {
650 return false;
651 }
652
653 /**
654 * Return false directly as not aware of NodeGroup, to be override in sub-class
655 */
656 public boolean isOnSameNodeGroup(Node node1, Node node2) {
657 return false;
658 }
659
660 /**
661 * Compare the parents of each node for equality
662 *
663 * <p>To be overridden in subclasses for specific NetworkTopology
664 * implementations, as alternative to overriding the full
665 * {@link #isOnSameRack(Node, Node)} method.
666 *
667 * @param node1 the first node to compare
668 * @param node2 the second node to compare
669 * @return true if their parents are equal, false otherwise
670 *
671 * @see #isOnSameRack(Node, Node)
672 */
673 protected boolean isSameParents(Node node1, Node node2) {
674 return node1.getParent()==node2.getParent();
675 }
676
677 private static final Random r = new Random();
678
679 @VisibleForTesting
680 void setRandomSeed(long seed) {
681 r.setSeed(seed);
682 }
683
684 /** randomly choose one node from <i>scope</i>
685 * if scope starts with ~, choose one from the all nodes except for the
686 * ones in <i>scope</i>; otherwise, choose one from <i>scope</i>
687 * @param scope range of nodes from which a node will be chosen
688 * @return the chosen node
689 */
690 public Node chooseRandom(String scope) {
691 netlock.readLock().lock();
692 try {
693 if (scope.startsWith("~")) {
694 return chooseRandom(NodeBase.ROOT, scope.substring(1));
695 } else {
696 return chooseRandom(scope, null);
697 }
698 } finally {
699 netlock.readLock().unlock();
700 }
701 }
702
703 private Node chooseRandom(String scope, String excludedScope){
704 if (excludedScope != null) {
705 if (scope.startsWith(excludedScope)) {
706 return null;
707 }
708 if (!excludedScope.startsWith(scope)) {
709 excludedScope = null;
710 }
711 }
712 Node node = getNode(scope);
713 if (!(node instanceof InnerNode)) {
714 return node;
715 }
716 InnerNode innerNode = (InnerNode)node;
717 int numOfDatanodes = innerNode.getNumOfLeaves();
718 if (excludedScope == null) {
719 node = null;
720 } else {
721 node = getNode(excludedScope);
722 if (!(node instanceof InnerNode)) {
723 numOfDatanodes -= 1;
724 } else {
725 numOfDatanodes -= ((InnerNode)node).getNumOfLeaves();
726 }
727 }
728 if (numOfDatanodes == 0) {
729 throw new InvalidTopologyException(
730 "Failed to find datanode (scope=\"" + String.valueOf(scope) +
731 "\" excludedScope=\"" + String.valueOf(excludedScope) + "\").");
732 }
733 int leaveIndex = r.nextInt(numOfDatanodes);
734 return innerNode.getLeaf(leaveIndex, node);
735 }
736
737 /** return leaves in <i>scope</i>
738 * @param scope a path string
739 * @return leaves nodes under specific scope
740 */
741 public List<Node> getLeaves(String scope) {
742 Node node = getNode(scope);
743 List<Node> leafNodes = new ArrayList<Node>();
744 if (!(node instanceof InnerNode)) {
745 leafNodes.add(node);
746 } else {
747 InnerNode innerNode = (InnerNode) node;
748 for (int i=0;i<innerNode.getNumOfLeaves();i++) {
749 leafNodes.add(innerNode.getLeaf(i, null));
750 }
751 }
752 return leafNodes;
753 }
754
755 /** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i>
756 * if scope starts with ~, return the number of nodes that are not
757 * in <i>scope</i> and <i>excludedNodes</i>;
758 * @param scope a path string that may start with ~
759 * @param excludedNodes a list of nodes
760 * @return number of available nodes
761 */
762 public int countNumOfAvailableNodes(String scope,
763 Collection<Node> excludedNodes) {
764 boolean isExcluded=false;
765 if (scope.startsWith("~")) {
766 isExcluded=true;
767 scope=scope.substring(1);
768 }
769 scope = NodeBase.normalize(scope);
770 int excludedCountInScope = 0; // the number of nodes in both scope & excludedNodes
771 int excludedCountOffScope = 0; // the number of nodes outside scope & excludedNodes
772 netlock.readLock().lock();
773 try {
774 for (Node node : excludedNodes) {
775 node = getNode(NodeBase.getPath(node));
776 if (node == null) {
777 continue;
778 }
779 if ((NodeBase.getPath(node) + NodeBase.PATH_SEPARATOR_STR)
780 .startsWith(scope + NodeBase.PATH_SEPARATOR_STR)) {
781 excludedCountInScope++;
782 } else {
783 excludedCountOffScope++;
784 }
785 }
786 Node n = getNode(scope);
787 int scopeNodeCount = 0;
788 if (n != null) {
789 scopeNodeCount++;
790 }
791 if (n instanceof InnerNode) {
792 scopeNodeCount=((InnerNode)n).getNumOfLeaves();
793 }
794 if (isExcluded) {
795 return clusterMap.getNumOfLeaves() - scopeNodeCount
796 - excludedCountOffScope;
797 } else {
798 return scopeNodeCount - excludedCountInScope;
799 }
800 } finally {
801 netlock.readLock().unlock();
802 }
803 }
804
805 /** convert a network tree to a string */
806 @Override
807 public String toString() {
808 // print the number of racks
809 StringBuilder tree = new StringBuilder();
810 tree.append("Number of racks: ");
811 tree.append(numOfRacks);
812 tree.append("\n");
813 // print the number of leaves
814 int numOfLeaves = getNumOfLeaves();
815 tree.append("Expected number of leaves:");
816 tree.append(numOfLeaves);
817 tree.append("\n");
818 // print nodes
819 for(int i=0; i<numOfLeaves; i++) {
820 tree.append(NodeBase.getPath(clusterMap.getLeaf(i, null)));
821 tree.append("\n");
822 }
823 return tree.toString();
824 }
825
826 /**
827 * Divide networklocation string into two parts by last separator, and get
828 * the first part here.
829 *
830 * @param networkLocation
831 * @return
832 */
833 public static String getFirstHalf(String networkLocation) {
834 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR);
835 return networkLocation.substring(0, index);
836 }
837
838 /**
839 * Divide networklocation string into two parts by last separator, and get
840 * the second part here.
841 *
842 * @param networkLocation
843 * @return
844 */
845 public static String getLastHalf(String networkLocation) {
846 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR);
847 return networkLocation.substring(index);
848 }
849
850 /**
851 * Returns an integer weight which specifies how far away {node} is away from
852 * {reader}. A lower value signifies that a node is closer.
853 *
854 * @param reader Node where data will be read
855 * @param node Replica of data
856 * @return weight
857 */
858 protected int getWeight(Node reader, Node node) {
859 // 0 is local, 1 is same rack, 2 is off rack
860 // Start off by initializing to off rack
861 int weight = 2;
862 if (reader != null) {
863 if (reader == node) {
864 weight = 0;
865 } else if (isOnSameRack(reader, node)) {
866 weight = 1;
867 }
868 }
869 return weight;
870 }
871
872 /**
873 * Sort nodes array by network distance to <i>reader</i>.
874 * <p/>
875 * In a three-level topology, a node can be either local, on the same rack,
876 * or on a different rack from the reader. Sorting the nodes based on network
877 * distance from the reader reduces network traffic and improves
878 * performance.
879 * <p/>
880 * As an additional twist, we also randomize the nodes at each network
881 * distance. This helps with load balancing when there is data skew.
882 *
883 * @param reader Node where data will be read
884 * @param nodes Available replicas with the requested data
885 * @param activeLen Number of active nodes at the front of the array
886 */
887 public void sortByDistance(Node reader, Node[] nodes, int activeLen) {
888 /** Sort weights for the nodes array */
889 int[] weights = new int[activeLen];
890 for (int i=0; i<activeLen; i++) {
891 weights[i] = getWeight(reader, nodes[i]);
892 }
893 // Add weight/node pairs to a TreeMap to sort
894 TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>();
895 for (int i=0; i<activeLen; i++) {
896 int weight = weights[i];
897 Node node = nodes[i];
898 List<Node> list = tree.get(weight);
899 if (list == null) {
900 list = Lists.newArrayListWithExpectedSize(1);
901 tree.put(weight, list);
902 }
903 list.add(node);
904 }
905
906 int idx = 0;
907 for (List<Node> list: tree.values()) {
908 if (list != null) {
909 Collections.shuffle(list, r);
910 for (Node n: list) {
911 nodes[idx] = n;
912 idx++;
913 }
914 }
915 }
916 Preconditions.checkState(idx == activeLen,
917 "Sorted the wrong number of nodes!");
918 }
919 }