001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.net;
019    
020    import java.util.ArrayList;
021    import java.util.List;
022    import java.util.Collection;
023    import java.util.Collections;
024    import java.util.List;
025    import java.util.Random;
026    import java.util.TreeMap;
027    import java.util.concurrent.locks.ReadWriteLock;
028    import java.util.concurrent.locks.ReentrantReadWriteLock;
029    
030    import com.google.common.annotations.VisibleForTesting;
031    import org.apache.commons.logging.Log;
032    import org.apache.commons.logging.LogFactory;
033    import org.apache.hadoop.classification.InterfaceAudience;
034    import org.apache.hadoop.classification.InterfaceStability;
035    import org.apache.hadoop.conf.Configuration;
036    import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
037    import org.apache.hadoop.util.ReflectionUtils;
038    
039    import com.google.common.base.Preconditions;
040    import com.google.common.collect.Lists;
041    
042    /** The class represents a cluster of computer with a tree hierarchical
043     * network topology.
044     * For example, a cluster may be consists of many data centers filled 
045     * with racks of computers.
046     * In a network topology, leaves represent data nodes (computers) and inner
047     * nodes represent switches/routers that manage traffic in/out of data centers
048     * or racks.  
049     * 
050     */
051    @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
052    @InterfaceStability.Unstable
053    public class NetworkTopology {
054      public final static String DEFAULT_RACK = "/default-rack";
055      public final static int DEFAULT_HOST_LEVEL = 2;
056      public static final Log LOG = 
057        LogFactory.getLog(NetworkTopology.class);
058        
059      public static class InvalidTopologyException extends RuntimeException {
060        private static final long serialVersionUID = 1L;
061        public InvalidTopologyException(String msg) {
062          super(msg);
063        }
064      }
065      
066      /**
067       * Get an instance of NetworkTopology based on the value of the configuration
068       * parameter net.topology.impl.
069       * 
070       * @param conf the configuration to be used
071       * @return an instance of NetworkTopology
072       */
073      public static NetworkTopology getInstance(Configuration conf){
074        return ReflectionUtils.newInstance(
075            conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
076            NetworkTopology.class, NetworkTopology.class), conf);
077      }
078    
079      /** InnerNode represents a switch/router of a data center or rack.
080       * Different from a leaf node, it has non-null children.
081       */
082      static class InnerNode extends NodeBase {
083        protected List<Node> children=new ArrayList<Node>();
084        private int numOfLeaves;
085            
086        /** Construct an InnerNode from a path-like string */
087        InnerNode(String path) {
088          super(path);
089        }
090            
091        /** Construct an InnerNode from its name and its network location */
092        InnerNode(String name, String location) {
093          super(name, location);
094        }
095            
096        /** Construct an InnerNode
097         * from its name, its network location, its parent, and its level */
098        InnerNode(String name, String location, InnerNode parent, int level) {
099          super(name, location, parent, level);
100        }
101            
102        /** @return its children */
103        List<Node> getChildren() {return children;}
104            
105        /** @return the number of children this node has */
106        int getNumOfChildren() {
107          return children.size();
108        }
109            
110        /** Judge if this node represents a rack 
111         * @return true if it has no child or its children are not InnerNodes
112         */ 
113        boolean isRack() {
114          if (children.isEmpty()) {
115            return true;
116          }
117                
118          Node firstChild = children.get(0);
119          if (firstChild instanceof InnerNode) {
120            return false;
121          }
122                
123          return true;
124        }
125            
126        /** Judge if this node is an ancestor of node <i>n</i>
127         * 
128         * @param n a node
129         * @return true if this node is an ancestor of <i>n</i>
130         */
131        boolean isAncestor(Node n) {
132          return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) ||
133            (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR).
134            startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR);
135        }
136            
137        /** Judge if this node is the parent of node <i>n</i>
138         * 
139         * @param n a node
140         * @return true if this node is the parent of <i>n</i>
141         */
142        boolean isParent(Node n) {
143          return n.getNetworkLocation().equals(getPath(this));
144        }
145            
146        /* Return a child name of this node who is an ancestor of node <i>n</i> */
147        private String getNextAncestorName(Node n) {
148          if (!isAncestor(n)) {
149            throw new IllegalArgumentException(
150                                               this + "is not an ancestor of " + n);
151          }
152          String name = n.getNetworkLocation().substring(getPath(this).length());
153          if (name.charAt(0) == PATH_SEPARATOR) {
154            name = name.substring(1);
155          }
156          int index=name.indexOf(PATH_SEPARATOR);
157          if (index !=-1)
158            name = name.substring(0, index);
159          return name;
160        }
161            
162        /** Add node <i>n</i> to the subtree of this node 
163         * @param n node to be added
164         * @return true if the node is added; false otherwise
165         */
166        boolean add(Node n) {
167          if (!isAncestor(n))
168            throw new IllegalArgumentException(n.getName()+", which is located at "
169                    +n.getNetworkLocation()+", is not a decendent of "
170                    +getPath(this));
171          if (isParent(n)) {
172            // this node is the parent of n; add n directly
173            n.setParent(this);
174            n.setLevel(this.level+1);
175            for(int i=0; i<children.size(); i++) {
176              if (children.get(i).getName().equals(n.getName())) {
177                children.set(i, n);
178                return false;
179              }
180            }
181            children.add(n);
182            numOfLeaves++;
183            return true;
184          } else {
185            // find the next ancestor node
186            String parentName = getNextAncestorName(n);
187            InnerNode parentNode = null;
188            for(int i=0; i<children.size(); i++) {
189              if (children.get(i).getName().equals(parentName)) {
190                parentNode = (InnerNode)children.get(i);
191                break;
192              }
193            }
194            if (parentNode == null) {
195              // create a new InnerNode
196              parentNode = createParentNode(parentName);
197              children.add(parentNode);
198            }
199            // add n to the subtree of the next ancestor node
200            if (parentNode.add(n)) {
201              numOfLeaves++;
202              return true;
203            } else {
204              return false;
205            }
206          }
207        }
208    
209        /**
210         * Creates a parent node to be added to the list of children.  
211         * Creates a node using the InnerNode four argument constructor specifying 
212         * the name, location, parent, and level of this node.
213         * 
214         * <p>To be overridden in subclasses for specific InnerNode implementations,
215         * as alternative to overriding the full {@link #add(Node)} method.
216         * 
217         * @param parentName The name of the parent node
218         * @return A new inner node
219         * @see InnerNode#InnerNode(String, String, InnerNode, int)
220         */
221        protected InnerNode createParentNode(String parentName) {
222          return new InnerNode(parentName, getPath(this), this, this.getLevel()+1);
223        }
224    
225        /** Remove node <i>n</i> from the subtree of this node
226         * @param n node to be deleted 
227         * @return true if the node is deleted; false otherwise
228         */
229        boolean remove(Node n) {
230          String parent = n.getNetworkLocation();
231          String currentPath = getPath(this);
232          if (!isAncestor(n))
233            throw new IllegalArgumentException(n.getName()
234                                               +", which is located at "
235                                               +parent+", is not a descendent of "+currentPath);
236          if (isParent(n)) {
237            // this node is the parent of n; remove n directly
238            for(int i=0; i<children.size(); i++) {
239              if (children.get(i).getName().equals(n.getName())) {
240                children.remove(i);
241                numOfLeaves--;
242                n.setParent(null);
243                return true;
244              }
245            }
246            return false;
247          } else {
248            // find the next ancestor node: the parent node
249            String parentName = getNextAncestorName(n);
250            InnerNode parentNode = null;
251            int i;
252            for(i=0; i<children.size(); i++) {
253              if (children.get(i).getName().equals(parentName)) {
254                parentNode = (InnerNode)children.get(i);
255                break;
256              }
257            }
258            if (parentNode==null) {
259              return false;
260            }
261            // remove n from the parent node
262            boolean isRemoved = parentNode.remove(n);
263            // if the parent node has no children, remove the parent node too
264            if (isRemoved) {
265              if (parentNode.getNumOfChildren() == 0) {
266                children.remove(i);
267              }
268              numOfLeaves--;
269            }
270            return isRemoved;
271          }
272        } // end of remove
273            
274        /** Given a node's string representation, return a reference to the node
275         * @param loc string location of the form /rack/node
276         * @return null if the node is not found or the childnode is there but
277         * not an instance of {@link InnerNode}
278         */
279        private Node getLoc(String loc) {
280          if (loc == null || loc.length() == 0) return this;
281                
282          String[] path = loc.split(PATH_SEPARATOR_STR, 2);
283          Node childnode = null;
284          for(int i=0; i<children.size(); i++) {
285            if (children.get(i).getName().equals(path[0])) {
286              childnode = children.get(i);
287            }
288          }
289          if (childnode == null) return null; // non-existing node
290          if (path.length == 1) return childnode;
291          if (childnode instanceof InnerNode) {
292            return ((InnerNode)childnode).getLoc(path[1]);
293          } else {
294            return null;
295          }
296        }
297            
298        /** get <i>leafIndex</i> leaf of this subtree 
299         * if it is not in the <i>excludedNode</i>
300         *
301         * @param leafIndex an indexed leaf of the node
302         * @param excludedNode an excluded node (can be null)
303         * @return
304         */
305        Node getLeaf(int leafIndex, Node excludedNode) {
306          int count=0;
307          // check if the excluded node a leaf
308          boolean isLeaf =
309            excludedNode == null || !(excludedNode instanceof InnerNode);
310          // calculate the total number of excluded leaf nodes
311          int numOfExcludedLeaves =
312            isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves();
313          if (isLeafParent()) { // children are leaves
314            if (isLeaf) { // excluded node is a leaf node
315              int excludedIndex = children.indexOf(excludedNode);
316              if (excludedIndex != -1 && leafIndex >= 0) {
317                // excluded node is one of the children so adjust the leaf index
318                leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex;
319              }
320            }
321            // range check
322            if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) {
323              return null;
324            }
325            return children.get(leafIndex);
326          } else {
327            for(int i=0; i<children.size(); i++) {
328              InnerNode child = (InnerNode)children.get(i);
329              if (excludedNode == null || excludedNode != child) {
330                // not the excludedNode
331                int numOfLeaves = child.getNumOfLeaves();
332                if (excludedNode != null && child.isAncestor(excludedNode)) {
333                  numOfLeaves -= numOfExcludedLeaves;
334                }
335                if (count+numOfLeaves > leafIndex) {
336                  // the leaf is in the child subtree
337                  return child.getLeaf(leafIndex-count, excludedNode);
338                } else {
339                  // go to the next child
340                  count = count+numOfLeaves;
341                }
342              } else { // it is the excluededNode
343                // skip it and set the excludedNode to be null
344                excludedNode = null;
345              }
346            }
347            return null;
348          }
349        }
350        
351        protected boolean isLeafParent() {
352          return isRack();
353        }
354    
355        /**
356          * Determine if children a leaves, default implementation calls {@link #isRack()}
357          * <p>To be overridden in subclasses for specific InnerNode implementations,
358          * as alternative to overriding the full {@link #getLeaf(int, Node)} method.
359          * 
360          * @return true if children are leaves, false otherwise
361          */
362        protected boolean areChildrenLeaves() {
363          return isRack();
364        }
365    
366        /**
367         * Get number of leaves.
368         */
369        int getNumOfLeaves() {
370          return numOfLeaves;
371        }
372      } // end of InnerNode
373    
374      /**
375       * the root cluster map
376       */
377      InnerNode clusterMap;
378      /** Depth of all leaf nodes */
379      private int depthOfAllLeaves = -1;
380      /** rack counter */
381      protected int numOfRacks = 0;
382      /** the lock used to manage access */
383      protected ReadWriteLock netlock = new ReentrantReadWriteLock();
384    
385      public NetworkTopology() {
386        clusterMap = new InnerNode(InnerNode.ROOT);
387      }
388    
389      /** Add a leaf node
390       * Update node counter & rack counter if necessary
391       * @param node node to be added; can be null
392       * @exception IllegalArgumentException if add a node to a leave 
393                                             or node to be added is not a leaf
394       */
395      public void add(Node node) {
396        if (node==null) return;
397        String oldTopoStr = this.toString();
398        if( node instanceof InnerNode ) {
399          throw new IllegalArgumentException(
400            "Not allow to add an inner node: "+NodeBase.getPath(node));
401        }
402        int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1;
403        netlock.writeLock().lock();
404        try {
405          if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) {
406            LOG.error("Error: can't add leaf node " + NodeBase.getPath(node) +
407                " at depth " + newDepth + " to topology:\n" + oldTopoStr);
408            throw new InvalidTopologyException("Failed to add " + NodeBase.getPath(node) +
409                ": You cannot have a rack and a non-rack node at the same " +
410                "level of the network topology.");
411          }
412          Node rack = getNodeForNetworkLocation(node);
413          if (rack != null && !(rack instanceof InnerNode)) {
414            throw new IllegalArgumentException("Unexpected data node " 
415                                               + node.toString() 
416                                               + " at an illegal network location");
417          }
418          if (clusterMap.add(node)) {
419            LOG.info("Adding a new node: "+NodeBase.getPath(node));
420            if (rack == null) {
421              numOfRacks++;
422            }
423            if (!(node instanceof InnerNode)) {
424              if (depthOfAllLeaves == -1) {
425                depthOfAllLeaves = node.getLevel();
426              }
427            }
428          }
429          if(LOG.isDebugEnabled()) {
430            LOG.debug("NetworkTopology became:\n" + this.toString());
431          }
432        } finally {
433          netlock.writeLock().unlock();
434        }
435      }
436      
437      /**
438       * Return a reference to the node given its string representation.
439       * Default implementation delegates to {@link #getNode(String)}.
440       * 
441       * <p>To be overridden in subclasses for specific NetworkTopology 
442       * implementations, as alternative to overriding the full {@link #add(Node)}
443       *  method.
444       * 
445       * @param node The string representation of this node's network location is
446       * used to retrieve a Node object. 
447       * @return a reference to the node; null if the node is not in the tree
448       * 
449       * @see #add(Node)
450       * @see #getNode(String)
451       */
452      protected Node getNodeForNetworkLocation(Node node) {
453        return getNode(node.getNetworkLocation());
454      }
455      
456      /**
457       * Given a string representation of a rack, return its children
458       * @param loc a path-like string representation of a rack
459       * @return a newly allocated list with all the node's children
460       */
461      public List<Node> getDatanodesInRack(String loc) {
462        netlock.readLock().lock();
463        try {
464          loc = NodeBase.normalize(loc);
465          if (!NodeBase.ROOT.equals(loc)) {
466            loc = loc.substring(1);
467          }
468          InnerNode rack = (InnerNode) clusterMap.getLoc(loc);
469          if (rack == null) {
470            return null;
471          }
472          return new ArrayList<Node>(rack.getChildren());
473        } finally {
474          netlock.readLock().unlock();
475        }
476      }
477    
478      /** Remove a node
479       * Update node counter and rack counter if necessary
480       * @param node node to be removed; can be null
481       */ 
482      public void remove(Node node) {
483        if (node==null) return;
484        if( node instanceof InnerNode ) {
485          throw new IllegalArgumentException(
486            "Not allow to remove an inner node: "+NodeBase.getPath(node));
487        }
488        LOG.info("Removing a node: "+NodeBase.getPath(node));
489        netlock.writeLock().lock();
490        try {
491          if (clusterMap.remove(node)) {
492            InnerNode rack = (InnerNode)getNode(node.getNetworkLocation());
493            if (rack == null) {
494              numOfRacks--;
495            }
496          }
497          if(LOG.isDebugEnabled()) {
498            LOG.debug("NetworkTopology became:\n" + this.toString());
499          }
500        } finally {
501          netlock.writeLock().unlock();
502        }
503      }
504    
505      /** Check if the tree contains node <i>node</i>
506       * 
507       * @param node a node
508       * @return true if <i>node</i> is already in the tree; false otherwise
509       */
510      public boolean contains(Node node) {
511        if (node == null) return false;
512        netlock.readLock().lock();
513        try {
514          Node parent = node.getParent();
515          for (int level = node.getLevel(); parent != null && level > 0;
516               parent = parent.getParent(), level--) {
517            if (parent == clusterMap) {
518              return true;
519            }
520          }
521        } finally {
522          netlock.readLock().unlock();
523        }
524        return false; 
525      }
526        
527      /** Given a string representation of a node, return its reference
528       * 
529       * @param loc
530       *          a path-like string representation of a node
531       * @return a reference to the node; null if the node is not in the tree
532       */
533      public Node getNode(String loc) {
534        netlock.readLock().lock();
535        try {
536          loc = NodeBase.normalize(loc);
537          if (!NodeBase.ROOT.equals(loc))
538            loc = loc.substring(1);
539          return clusterMap.getLoc(loc);
540        } finally {
541          netlock.readLock().unlock();
542        }
543      }
544      
545      /** Given a string representation of a rack for a specific network
546       *  location
547       * 
548       * To be overridden in subclasses for specific NetworkTopology 
549       * implementations, as alternative to overriding the full 
550       * {@link #getRack(String)} method.
551       * @param loc
552       *          a path-like string representation of a network location
553       * @return a rack string
554       */
555      public String getRack(String loc) {
556        return loc;
557      }
558      
559      /** @return the total number of racks */
560      public int getNumOfRacks() {
561        netlock.readLock().lock();
562        try {
563          return numOfRacks;
564        } finally {
565          netlock.readLock().unlock();
566        }
567      }
568    
569      /** @return the total number of leaf nodes */
570      public int getNumOfLeaves() {
571        netlock.readLock().lock();
572        try {
573          return clusterMap.getNumOfLeaves();
574        } finally {
575          netlock.readLock().unlock();
576        }
577      }
578    
579      /** Return the distance between two nodes
580       * It is assumed that the distance from one node to its parent is 1
581       * The distance between two nodes is calculated by summing up their distances
582       * to their closest common ancestor.
583       * @param node1 one node
584       * @param node2 another node
585       * @return the distance between node1 and node2 which is zero if they are the same
586       *  or {@link Integer#MAX_VALUE} if node1 or node2 do not belong to the cluster
587       */
588      public int getDistance(Node node1, Node node2) {
589        if (node1 == node2) {
590          return 0;
591        }
592        Node n1=node1, n2=node2;
593        int dis = 0;
594        netlock.readLock().lock();
595        try {
596          int level1=node1.getLevel(), level2=node2.getLevel();
597          while(n1!=null && level1>level2) {
598            n1 = n1.getParent();
599            level1--;
600            dis++;
601          }
602          while(n2!=null && level2>level1) {
603            n2 = n2.getParent();
604            level2--;
605            dis++;
606          }
607          while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) {
608            n1=n1.getParent();
609            n2=n2.getParent();
610            dis+=2;
611          }
612        } finally {
613          netlock.readLock().unlock();
614        }
615        if (n1==null) {
616          LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1));
617          return Integer.MAX_VALUE;
618        }
619        if (n2==null) {
620          LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2));
621          return Integer.MAX_VALUE;
622        }
623        return dis+2;
624      }
625    
626      /** Check if two nodes are on the same rack
627       * @param node1 one node (can be null)
628       * @param node2 another node (can be null)
629       * @return true if node1 and node2 are on the same rack; false otherwise
630       * @exception IllegalArgumentException when either node1 or node2 is null, or
631       * node1 or node2 do not belong to the cluster
632       */
633      public boolean isOnSameRack( Node node1,  Node node2) {
634        if (node1 == null || node2 == null) {
635          return false;
636        }
637          
638        netlock.readLock().lock();
639        try {
640          return isSameParents(node1, node2);
641        } finally {
642          netlock.readLock().unlock();
643        }
644      }
645      
646      /**
647       * Check if network topology is aware of NodeGroup
648       */
649      public boolean isNodeGroupAware() {
650        return false;
651      }
652      
653      /** 
654       * Return false directly as not aware of NodeGroup, to be override in sub-class
655       */
656      public boolean isOnSameNodeGroup(Node node1, Node node2) {
657        return false;
658      }
659    
660      /**
661       * Compare the parents of each node for equality
662       * 
663       * <p>To be overridden in subclasses for specific NetworkTopology 
664       * implementations, as alternative to overriding the full 
665       * {@link #isOnSameRack(Node, Node)} method.
666       * 
667       * @param node1 the first node to compare
668       * @param node2 the second node to compare
669       * @return true if their parents are equal, false otherwise
670       * 
671       * @see #isOnSameRack(Node, Node)
672       */
673      protected boolean isSameParents(Node node1, Node node2) {
674        return node1.getParent()==node2.getParent();
675      }
676    
677      private static final Random r = new Random();
678    
679      @VisibleForTesting
680      void setRandomSeed(long seed) {
681        r.setSeed(seed);
682      }
683    
684      /** randomly choose one node from <i>scope</i>
685       * if scope starts with ~, choose one from the all nodes except for the
686       * ones in <i>scope</i>; otherwise, choose one from <i>scope</i>
687       * @param scope range of nodes from which a node will be chosen
688       * @return the chosen node
689       */
690      public Node chooseRandom(String scope) {
691        netlock.readLock().lock();
692        try {
693          if (scope.startsWith("~")) {
694            return chooseRandom(NodeBase.ROOT, scope.substring(1));
695          } else {
696            return chooseRandom(scope, null);
697          }
698        } finally {
699          netlock.readLock().unlock();
700        }
701      }
702    
703      private Node chooseRandom(String scope, String excludedScope){
704        if (excludedScope != null) {
705          if (scope.startsWith(excludedScope)) {
706            return null;
707          }
708          if (!excludedScope.startsWith(scope)) {
709            excludedScope = null;
710          }
711        }
712        Node node = getNode(scope);
713        if (!(node instanceof InnerNode)) {
714          return node;
715        }
716        InnerNode innerNode = (InnerNode)node;
717        int numOfDatanodes = innerNode.getNumOfLeaves();
718        if (excludedScope == null) {
719          node = null;
720        } else {
721          node = getNode(excludedScope);
722          if (!(node instanceof InnerNode)) {
723            numOfDatanodes -= 1;
724          } else {
725            numOfDatanodes -= ((InnerNode)node).getNumOfLeaves();
726          }
727        }
728        if (numOfDatanodes == 0) {
729          throw new InvalidTopologyException(
730              "Failed to find datanode (scope=\"" + String.valueOf(scope) +
731              "\" excludedScope=\"" + String.valueOf(excludedScope) + "\").");
732        }
733        int leaveIndex = r.nextInt(numOfDatanodes);
734        return innerNode.getLeaf(leaveIndex, node);
735      }
736    
737      /** return leaves in <i>scope</i>
738       * @param scope a path string
739       * @return leaves nodes under specific scope
740       */
741      public List<Node> getLeaves(String scope) {
742        Node node = getNode(scope);
743        List<Node> leafNodes = new ArrayList<Node>();
744        if (!(node instanceof InnerNode)) {
745          leafNodes.add(node);
746        } else {
747          InnerNode innerNode = (InnerNode) node;
748          for (int i=0;i<innerNode.getNumOfLeaves();i++) {
749            leafNodes.add(innerNode.getLeaf(i, null));
750          }
751        }
752        return leafNodes;
753      }
754    
755      /** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i>
756       * if scope starts with ~, return the number of nodes that are not
757       * in <i>scope</i> and <i>excludedNodes</i>; 
758       * @param scope a path string that may start with ~
759       * @param excludedNodes a list of nodes
760       * @return number of available nodes
761       */
762      public int countNumOfAvailableNodes(String scope,
763                                          Collection<Node> excludedNodes) {
764        boolean isExcluded=false;
765        if (scope.startsWith("~")) {
766          isExcluded=true;
767          scope=scope.substring(1);
768        }
769        scope = NodeBase.normalize(scope);
770        int excludedCountInScope = 0; // the number of nodes in both scope & excludedNodes
771        int excludedCountOffScope = 0; // the number of nodes outside scope & excludedNodes
772        netlock.readLock().lock();
773        try {
774          for (Node node : excludedNodes) {
775            node = getNode(NodeBase.getPath(node));
776            if (node == null) {
777              continue;
778            }
779            if ((NodeBase.getPath(node) + NodeBase.PATH_SEPARATOR_STR)
780                .startsWith(scope + NodeBase.PATH_SEPARATOR_STR)) {
781              excludedCountInScope++;
782            } else {
783              excludedCountOffScope++;
784            }
785          }
786          Node n = getNode(scope);
787          int scopeNodeCount = 0;
788          if (n != null) {
789            scopeNodeCount++;
790          }
791          if (n instanceof InnerNode) {
792            scopeNodeCount=((InnerNode)n).getNumOfLeaves();
793          }
794          if (isExcluded) {
795            return clusterMap.getNumOfLeaves() - scopeNodeCount
796                - excludedCountOffScope;
797          } else {
798            return scopeNodeCount - excludedCountInScope;
799          }
800        } finally {
801          netlock.readLock().unlock();
802        }
803      }
804    
805      /** convert a network tree to a string */
806      @Override
807      public String toString() {
808        // print the number of racks
809        StringBuilder tree = new StringBuilder();
810        tree.append("Number of racks: ");
811        tree.append(numOfRacks);
812        tree.append("\n");
813        // print the number of leaves
814        int numOfLeaves = getNumOfLeaves();
815        tree.append("Expected number of leaves:");
816        tree.append(numOfLeaves);
817        tree.append("\n");
818        // print nodes
819        for(int i=0; i<numOfLeaves; i++) {
820          tree.append(NodeBase.getPath(clusterMap.getLeaf(i, null)));
821          tree.append("\n");
822        }
823        return tree.toString();
824      }
825      
826      /**
827       * Divide networklocation string into two parts by last separator, and get 
828       * the first part here.
829       * 
830       * @param networkLocation
831       * @return
832       */
833      public static String getFirstHalf(String networkLocation) {
834        int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR);
835        return networkLocation.substring(0, index);
836      }
837    
838      /**
839       * Divide networklocation string into two parts by last separator, and get 
840       * the second part here.
841       * 
842       * @param networkLocation
843       * @return
844       */
845      public static String getLastHalf(String networkLocation) {
846        int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR);
847        return networkLocation.substring(index);
848      }
849    
850      /**
851       * Returns an integer weight which specifies how far away {node} is away from
852       * {reader}. A lower value signifies that a node is closer.
853       * 
854       * @param reader Node where data will be read
855       * @param node Replica of data
856       * @return weight
857       */
858      protected int getWeight(Node reader, Node node) {
859        // 0 is local, 1 is same rack, 2 is off rack
860        // Start off by initializing to off rack
861        int weight = 2;
862        if (reader != null) {
863          if (reader == node) {
864            weight = 0;
865          } else if (isOnSameRack(reader, node)) {
866            weight = 1;
867          }
868        }
869        return weight;
870      }
871    
872      /**
873       * Sort nodes array by network distance to <i>reader</i>.
874       * <p/>
875       * In a three-level topology, a node can be either local, on the same rack,
876       * or on a different rack from the reader. Sorting the nodes based on network
877       * distance from the reader reduces network traffic and improves
878       * performance.
879       * <p/>
880       * As an additional twist, we also randomize the nodes at each network
881       * distance. This helps with load balancing when there is data skew.
882       *
883       * @param reader    Node where data will be read
884       * @param nodes     Available replicas with the requested data
885       * @param activeLen Number of active nodes at the front of the array
886       */
887      public void sortByDistance(Node reader, Node[] nodes, int activeLen) {
888        /** Sort weights for the nodes array */
889        int[] weights = new int[activeLen];
890        for (int i=0; i<activeLen; i++) {
891          weights[i] = getWeight(reader, nodes[i]);
892        }
893        // Add weight/node pairs to a TreeMap to sort
894        TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>();
895        for (int i=0; i<activeLen; i++) {
896          int weight = weights[i];
897          Node node = nodes[i];
898          List<Node> list = tree.get(weight);
899          if (list == null) {
900            list = Lists.newArrayListWithExpectedSize(1);
901            tree.put(weight, list);
902          }
903          list.add(node);
904        }
905    
906        int idx = 0;
907        for (List<Node> list: tree.values()) {
908          if (list != null) {
909            Collections.shuffle(list, r);
910            for (Node n: list) {
911              nodes[idx] = n;
912              idx++;
913            }
914          }
915        }
916        Preconditions.checkState(idx == activeLen,
917            "Sorted the wrong number of nodes!");
918      }
919    }