001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import java.io.IOException;
021    import java.net.InetAddress;
022    import java.net.UnknownHostException;
023    import java.util.HashMap;
024    import java.util.HashSet;
025    import java.util.Iterator;
026    import java.util.Map;
027    import java.util.TreeMap;
028    
029    import org.apache.commons.logging.Log;
030    import org.apache.commons.logging.LogFactory;
031    import org.apache.hadoop.hdfs.protocol.DatanodeID;
032    import org.apache.hadoop.util.HostsFileReader;
033    
034    /**
035     * This class manages the include and exclude files for HDFS.
036     * 
037     * These files control which DataNodes the NameNode expects to see in the
038     * cluster.  Loosely speaking, the include file, if it exists and is not
039     * empty, is a list of everything we expect to see.  The exclude file is 
040     * a list of everything we want to ignore if we do see it.
041     *
042     * Entries may or may not specify a port.  If they don't, we consider
043     * them to apply to every DataNode on that host.  For example, putting 
044     * 192.168.0.100 in the excludes file blacklists both 192.168.0.100:5000 and
045     * 192.168.0.100:6000.  This case comes up in unit tests.
046     *
047     * When reading the hosts files, we try to find the IP address for each
048     * entry.  This is important because it allows us to de-duplicate entries.
049     * If the user specifies a node as foo.bar.com in the include file, but
050     * 192.168.0.100 in the exclude file, we need to realize that these are 
051     * the same node.  Resolving the IP address also allows us to give more
052     * information back to getDatanodeListForReport, which makes the web UI 
053     * look nicer (among other things.)  See HDFS-3934 for more details.
054     *
055     * DNS resolution can be slow.  For this reason, we ONLY do it when (re)reading
056     * the hosts files.  In all other cases, we rely on the cached values either
057     * in the DatanodeID objects, or in HostFileManager#Entry.
058     * We also don't want to be holding locks when doing this.
059     * See HDFS-3990 for more discussion of DNS overheads.
060     * 
061     * Not all entries in the hosts files will have an associated IP address. 
062     * Some entries may be "registration names."  The "registration name" of 
063     * a DataNode is either the actual hostname, or an arbitrary string configured
064     * by dfs.datanode.hostname.  It's possible to add registration names to the
065     * include or exclude files.  If we can't find an IP address associated with
066     * a host file entry, we assume it's a registered hostname and act accordingly.
067     * The "registration name" feature is a little odd and it may be removed in the
068     * future (I hope?)
069     */
070    public class HostFileManager {
071      private static final Log LOG = LogFactory.getLog(HostFileManager.class);
072    
073      public static class Entry {
074        /**
075         * This what the user put on the line before the colon, or the whole line
076         * if there is no colon.
077         */
078        private final String prefix;
079        
080        /**
081         * This is the port which was specified after the colon.  It is 0 if no
082         * port was given.
083         */
084        private final int port;
085    
086        /**
087         * If we can resolve the IP address, this is it.  Otherwise, it is the 
088         * empty string.
089         */
090        private final String ipAddress;
091    
092        /**
093         * Parse a hosts file Entry.
094         */
095        static Entry parse(String fileName, String entry) throws IOException {
096          final String prefix;
097          final int port;
098          String ipAddress = "";
099          
100          int idx = entry.indexOf(':');
101          if (-1 == idx) {
102            prefix = entry;
103            port = 0;
104          } else {
105            prefix = entry.substring(0, idx);
106            String portStr = entry.substring(idx + 1);
107            try {
108              port = Integer.valueOf(portStr);
109            } catch (NumberFormatException e) {
110              throw new IOException("unable to parse port number for " +
111                  "'" + entry + "'", e);
112            }
113          }
114          try {
115            // Let's see if we can resolve this prefix to an IP address.
116            // This may fail; one example is with a registered hostname
117            // which is not actually a real DNS name.
118            InetAddress addr = InetAddress.getByName(prefix);
119            ipAddress = addr.getHostAddress();
120          } catch (UnknownHostException e) {
121            LOG.info("When reading " + fileName + ", could not look up " +
122                "IP address for " + prefix + ".  We will assume this is a " +
123                "registration name.", e);
124          }
125          return new Entry(prefix, port, ipAddress);
126        }
127    
128        public String getIdentifier() {
129          return ipAddress.isEmpty() ? prefix : ipAddress;
130        }
131    
132        public Entry(String prefix, int port, String ipAddress) {
133          this.prefix = prefix;
134          this.port = port;
135          this.ipAddress = ipAddress;
136        }
137    
138        public String getPrefix() {
139          return prefix;
140        }
141    
142        public int getPort() {
143          return port;
144        }
145    
146        public String getIpAddress() {
147          return ipAddress;
148        }
149    
150        public String toString() {
151          StringBuilder bld = new StringBuilder();
152          bld.append("Entry{").append(prefix).append(", port=").
153              append(port).append(", ipAddress=").append(ipAddress).append("}");
154          return bld.toString();
155        }
156      }
157    
158      public static class EntrySet implements Iterable<Entry> {
159        /**
160         * The index.  Each Entry appears in here exactly once.
161         *
162         * It may be indexed by one of:
163         *     ipAddress:port
164         *     ipAddress
165         *     registeredHostname:port
166         *     registeredHostname
167         *     
168         * The different indexing strategies reflect the fact that we may or may
169         * not have a port or IP address for each entry.
170         */
171        TreeMap<String, Entry> index = new TreeMap<String, Entry>();
172    
173        public boolean isEmpty() {
174          return index.isEmpty();
175        }
176    
177        public Entry find(DatanodeID datanodeID) {
178          Entry entry;
179          int xferPort = datanodeID.getXferPort();
180          assert(xferPort > 0);
181          String datanodeIpAddr = datanodeID.getIpAddr();
182          if (datanodeIpAddr != null) {
183            entry = index.get(datanodeIpAddr + ":" + xferPort);
184            if (entry != null) {
185              return entry;
186            }
187            entry = index.get(datanodeIpAddr);
188            if (entry != null) {
189              return entry;
190            }
191          }
192          String registeredHostName = datanodeID.getHostName();
193          if (registeredHostName != null) {
194            entry = index.get(registeredHostName + ":" + xferPort);
195            if (entry != null) {
196              return entry;
197            }
198            entry = index.get(registeredHostName);
199            if (entry != null) {
200              return entry;
201            }
202          }
203          return null;
204        }
205    
206        public Entry find(Entry toFind) {
207          int port = toFind.getPort();
208          if (port != 0) {
209            return index.get(toFind.getIdentifier() + ":" + port);
210          } else {
211            // An Entry with no port matches any entry with the same identifer.
212            // In other words, we treat 0 as "any port."
213            Map.Entry<String, Entry> ceil =
214                index.ceilingEntry(toFind.getIdentifier());
215            if ((ceil != null) &&
216                (ceil.getValue().getIdentifier().equals(
217                    toFind.getIdentifier()))) {
218              return ceil.getValue();
219            }
220            return null;
221          }
222        }
223    
224        public String toString() {
225          StringBuilder bld = new StringBuilder();
226          
227          bld.append("HostSet(");
228          for (Map.Entry<String, Entry> entry : index.entrySet()) {
229            bld.append("\n\t");
230            bld.append(entry.getKey()).append("->").
231                append(entry.getValue().toString());
232          }
233          bld.append("\n)");
234          return bld.toString();
235        }
236    
237        @Override
238        public Iterator<Entry> iterator() {
239          return index.values().iterator();
240        }
241      }
242    
243      public static class MutableEntrySet extends EntrySet {
244        public void add(DatanodeID datanodeID) {
245          Entry entry = new Entry(datanodeID.getHostName(),
246              datanodeID.getXferPort(), datanodeID.getIpAddr());
247          index.put(datanodeID.getIpAddr() + ":" + datanodeID.getXferPort(),
248              entry);
249        }
250    
251        public void add(Entry entry) {
252          int port = entry.getPort();
253          if (port != 0) {
254            index.put(entry.getIdentifier() + ":" + port, entry);
255          } else {
256            index.put(entry.getIdentifier(), entry);
257          }
258        }
259    
260        void readFile(String type, String filename) throws IOException {
261          if (filename.isEmpty()) {
262            return;
263          }
264          HashSet<String> entrySet = new HashSet<String>();
265          HostsFileReader.readFileToSet(type, filename, entrySet);
266          for (String str : entrySet) {
267            Entry entry = Entry.parse(filename, str);
268            add(entry);
269          }
270        }
271      }
272    
273      private EntrySet includes = new EntrySet();
274      private EntrySet excludes = new EntrySet();
275    
276      public HostFileManager() {
277      }
278    
279      public void refresh(String includeFile, String excludeFile)
280          throws IOException {
281        MutableEntrySet newIncludes = new MutableEntrySet();
282        IOException includeException = null;
283        try {
284          newIncludes.readFile("included", includeFile);
285        } catch (IOException e) {
286          includeException = e;
287        }
288        MutableEntrySet newExcludes = new MutableEntrySet();
289        IOException excludeException = null;
290        try {
291          newExcludes.readFile("excluded", excludeFile);
292        } catch (IOException e) {
293          excludeException = e;
294        }
295        synchronized(this) {
296          if (includeException == null) {
297            includes = newIncludes;
298          }
299          if (excludeException == null) {
300            excludes = newExcludes;
301          }
302        }
303        if (includeException == null) {
304          LOG.info("read includes:\n" + newIncludes);
305        } else {
306          LOG.error("failed to read include file '" + includeFile + "'. " +
307              "Continuing to use previous include list.",
308              includeException);
309        }
310        if (excludeException == null) {
311          LOG.info("read excludes:\n" + newExcludes);
312        } else {
313          LOG.error("failed to read exclude file '" + excludeFile + "'." +
314              "Continuing to use previous exclude list.",
315              excludeException);
316        }
317        if (includeException != null) {
318          throw new IOException("error reading hosts file " + includeFile,
319              includeException);
320        }
321        if (excludeException != null) {
322          throw new IOException("error reading exclude file " + excludeFile,
323              excludeException);
324        }
325      }
326    
327      public synchronized boolean isIncluded(DatanodeID dn) {
328        if (includes.isEmpty()) {
329          // If the includes list is empty, act as if everything is in the
330          // includes list.
331          return true;
332        } else {
333          return includes.find(dn) != null;
334        }
335      }
336    
337      public synchronized boolean isExcluded(DatanodeID dn) {
338        return excludes.find(dn) != null;
339      }
340    
341      public synchronized boolean hasIncludes() {
342        return !includes.isEmpty();
343      }
344    
345      /**
346       * @return          the includes as an immutable set.
347       */
348      public synchronized EntrySet getIncludes() {
349        return includes;
350      }
351    
352      /**
353       * @return          the excludes as an immutable set.
354       */
355      public synchronized EntrySet getExcludes() {
356        return excludes;
357      }
358    }