001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import java.io.File;
021    import java.io.IOException;
022    import java.io.PrintStream;
023    import java.net.InetSocketAddress;
024    import java.net.URI;
025    import java.security.PrivilegedExceptionAction;
026    import java.util.ArrayList;
027    import java.util.Arrays;
028    import java.util.Collection;
029    import java.util.Iterator;
030    import java.util.List;
031    import org.apache.commons.logging.Log;
032    import org.apache.commons.logging.LogFactory;
033    import org.apache.hadoop.HadoopIllegalArgumentException;
034    import org.apache.hadoop.classification.InterfaceAudience;
035    import org.apache.hadoop.conf.Configuration;
036    import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
037    import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
038    import org.apache.hadoop.ha.HAServiceStatus;
039    import org.apache.hadoop.ha.HealthCheckFailedException;
040    import org.apache.hadoop.ha.ServiceFailedException;
041    import org.apache.hadoop.fs.FileSystem;
042    import org.apache.hadoop.fs.FileUtil;
043    import org.apache.hadoop.fs.Trash;
044    import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
045    import static org.apache.hadoop.util.ExitUtil.terminate;
046    import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
047    
048    import org.apache.hadoop.hdfs.DFSConfigKeys;
049    import org.apache.hadoop.hdfs.DFSUtil;
050    import org.apache.hadoop.hdfs.HAUtil;
051    import org.apache.hadoop.hdfs.HdfsConfiguration;
052    import org.apache.hadoop.hdfs.protocol.ClientProtocol;
053    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
054    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
055    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
056    import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
057    import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby;
058    import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
059    import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
060    import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
061    import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
062    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
063    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
064    import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
065    import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
066    import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
067    import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
068    import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
069    import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
070    import org.apache.hadoop.ipc.Server;
071    import org.apache.hadoop.ipc.StandbyException;
072    import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
073    import org.apache.hadoop.net.NetUtils;
074    import org.apache.hadoop.security.AccessControlException;
075    import org.apache.hadoop.security.RefreshUserMappingsProtocol;
076    import org.apache.hadoop.security.SecurityUtil;
077    import org.apache.hadoop.security.UserGroupInformation;
078    import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
079    import org.apache.hadoop.tools.GetUserMappingsProtocol;
080    import org.apache.hadoop.util.ExitUtil.ExitException;
081    import org.apache.hadoop.util.ServicePlugin;
082    import org.apache.hadoop.util.StringUtils;
083    
084    import com.google.common.annotations.VisibleForTesting;
085    import com.google.common.base.Joiner;
086    import com.google.common.base.Preconditions;
087    import com.google.common.collect.Lists;
088    
089    /**********************************************************
090     * NameNode serves as both directory namespace manager and
091     * "inode table" for the Hadoop DFS.  There is a single NameNode
092     * running in any DFS deployment.  (Well, except when there
093     * is a second backup/failover NameNode, or when using federated NameNodes.)
094     *
095     * The NameNode controls two critical tables:
096     *   1)  filename->blocksequence (namespace)
097     *   2)  block->machinelist ("inodes")
098     *
099     * The first table is stored on disk and is very precious.
100     * The second table is rebuilt every time the NameNode comes up.
101     *
102     * 'NameNode' refers to both this class as well as the 'NameNode server'.
103     * The 'FSNamesystem' class actually performs most of the filesystem
104     * management.  The majority of the 'NameNode' class itself is concerned
105     * with exposing the IPC interface and the HTTP server to the outside world,
106     * plus some configuration management.
107     *
108     * NameNode implements the
109     * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
110     * allows clients to ask for DFS services.
111     * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
112     * direct use by authors of DFS client code.  End-users should instead use the
113     * {@link org.apache.hadoop.fs.FileSystem} class.
114     *
115     * NameNode also implements the
116     * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
117     * used by DataNodes that actually store DFS data blocks.  These
118     * methods are invoked repeatedly and automatically by all the
119     * DataNodes in a DFS deployment.
120     *
121     * NameNode also implements the
122     * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
123     * used by secondary namenodes or rebalancing processes to get partial
124     * NameNode state, for example partial blocksMap etc.
125     **********************************************************/
126    @InterfaceAudience.Private
127    public class NameNode {
128      static{
129        HdfsConfiguration.init();
130      }
131      
132      /**
133       * Categories of operations supported by the namenode.
134       */
135      public static enum OperationCategory {
136        /** Operations that are state agnostic */
137        UNCHECKED,
138        /** Read operation that does not change the namespace state */
139        READ,
140        /** Write operation that changes the namespace state */
141        WRITE,
142        /** Operations related to checkpointing */
143        CHECKPOINT,
144        /** Operations related to {@link JournalProtocol} */
145        JOURNAL
146      }
147      
148      /**
149       * HDFS configuration can have three types of parameters:
150       * <ol>
151       * <li>Parameters that are common for all the name services in the cluster.</li>
152       * <li>Parameters that are specific to a name service. These keys are suffixed
153       * with nameserviceId in the configuration. For example,
154       * "dfs.namenode.rpc-address.nameservice1".</li>
155       * <li>Parameters that are specific to a single name node. These keys are suffixed
156       * with nameserviceId and namenodeId in the configuration. for example,
157       * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
158       * </ol>
159       * 
160       * In the latter cases, operators may specify the configuration without
161       * any suffix, with a nameservice suffix, or with a nameservice and namenode
162       * suffix. The more specific suffix will take precedence.
163       * 
164       * These keys are specific to a given namenode, and thus may be configured
165       * globally, for a nameservice, or for a specific namenode within a nameservice.
166       */
167      public static final String[] NAMENODE_SPECIFIC_KEYS = {
168        DFS_NAMENODE_RPC_ADDRESS_KEY,
169        DFS_NAMENODE_RPC_BIND_HOST_KEY,
170        DFS_NAMENODE_NAME_DIR_KEY,
171        DFS_NAMENODE_EDITS_DIR_KEY,
172        DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
173        DFS_NAMENODE_CHECKPOINT_DIR_KEY,
174        DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
175        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
176        DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
177        DFS_NAMENODE_HTTP_ADDRESS_KEY,
178        DFS_NAMENODE_KEYTAB_FILE_KEY,
179        DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
180        DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
181        DFS_NAMENODE_BACKUP_ADDRESS_KEY,
182        DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
183        DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
184        DFS_NAMENODE_USER_NAME_KEY,
185        DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY,
186        DFS_HA_FENCE_METHODS_KEY,
187        DFS_HA_ZKFC_PORT_KEY,
188        DFS_HA_FENCE_METHODS_KEY
189      };
190      
191      /**
192       * @see #NAMENODE_SPECIFIC_KEYS
193       * These keys are specific to a nameservice, but may not be overridden
194       * for a specific namenode.
195       */
196      public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
197        DFS_HA_AUTO_FAILOVER_ENABLED_KEY
198      };
199      
200      private static final String USAGE = "Usage: java NameNode ["
201          + StartupOption.BACKUP.getName() + "] | ["
202          + StartupOption.CHECKPOINT.getName() + "] | ["
203          + StartupOption.FORMAT.getName() + " ["
204          + StartupOption.CLUSTERID.getName() + " cid ] ["
205          + StartupOption.FORCE.getName() + "] ["
206          + StartupOption.NONINTERACTIVE.getName() + "] ] | ["
207          + StartupOption.UPGRADE.getName() + "] | ["
208          + StartupOption.ROLLBACK.getName() + "] | ["
209          + StartupOption.FINALIZE.getName() + "] | ["
210          + StartupOption.IMPORT.getName() + "] | ["
211          + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | ["
212          + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | ["
213          + StartupOption.RECOVER.getName() + " [ " + StartupOption.FORCE.getName()
214          + " ] ]";
215      
216      public long getProtocolVersion(String protocol, 
217                                     long clientVersion) throws IOException {
218        if (protocol.equals(ClientProtocol.class.getName())) {
219          return ClientProtocol.versionID; 
220        } else if (protocol.equals(DatanodeProtocol.class.getName())){
221          return DatanodeProtocol.versionID;
222        } else if (protocol.equals(NamenodeProtocol.class.getName())){
223          return NamenodeProtocol.versionID;
224        } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
225          return RefreshAuthorizationPolicyProtocol.versionID;
226        } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
227          return RefreshUserMappingsProtocol.versionID;
228        } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
229          return GetUserMappingsProtocol.versionID;
230        } else {
231          throw new IOException("Unknown protocol to name node: " + protocol);
232        }
233      }
234        
235      public static final int DEFAULT_PORT = 8020;
236      public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
237      public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
238      public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
239      public static final HAState ACTIVE_STATE = new ActiveState();
240      public static final HAState STANDBY_STATE = new StandbyState();
241      
242      protected FSNamesystem namesystem; 
243      protected final Configuration conf;
244      protected NamenodeRole role;
245      private volatile HAState state;
246      private final boolean haEnabled;
247      private final HAContext haContext;
248      protected boolean allowStaleStandbyReads;
249    
250      
251      /** httpServer */
252      protected NameNodeHttpServer httpServer;
253      private Thread emptier;
254      /** only used for testing purposes  */
255      protected boolean stopRequested = false;
256      /** Registration information of this name-node  */
257      protected NamenodeRegistration nodeRegistration;
258      /** Activated plug-ins. */
259      private List<ServicePlugin> plugins;
260      
261      private NameNodeRpcServer rpcServer;
262      
263      /** Format a new filesystem.  Destroys any filesystem that may already
264       * exist at this location.  **/
265      public static void format(Configuration conf) throws IOException {
266        format(conf, true, true);
267      }
268    
269      static NameNodeMetrics metrics;
270      private static final StartupProgress startupProgress = new StartupProgress();
271      static {
272        StartupProgressMetrics.register(startupProgress);
273      }
274    
275      /** Return the {@link FSNamesystem} object.
276       * @return {@link FSNamesystem} object.
277       */
278      public FSNamesystem getNamesystem() {
279        return namesystem;
280      }
281    
282      public NamenodeProtocols getRpcServer() {
283        return rpcServer;
284      }
285      
286      static void initMetrics(Configuration conf, NamenodeRole role) {
287        metrics = NameNodeMetrics.create(conf, role);
288      }
289    
290      public static NameNodeMetrics getNameNodeMetrics() {
291        return metrics;
292      }
293    
294      /**
295       * Returns object used for reporting namenode startup progress.
296       * 
297       * @return StartupProgress for reporting namenode startup progress
298       */
299      public static StartupProgress getStartupProgress() {
300        return startupProgress;
301      }
302    
303      public static InetSocketAddress getAddress(String address) {
304        return NetUtils.createSocketAddr(address, DEFAULT_PORT);
305      }
306      
307      /**
308       * Set the configuration property for the service rpc address
309       * to address
310       */
311      public static void setServiceAddress(Configuration conf,
312                                               String address) {
313        LOG.info("Setting ADDRESS " + address);
314        conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
315      }
316      
317      /**
318       * Fetches the address for services to use when connecting to namenode
319       * based on the value of fallback returns null if the special
320       * address is not specified or returns the default namenode address
321       * to be used by both clients and services.
322       * Services here are datanodes, backup node, any non client connection
323       */
324      public static InetSocketAddress getServiceAddress(Configuration conf,
325                                                            boolean fallback) {
326        String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
327        if (addr == null || addr.isEmpty()) {
328          return fallback ? getAddress(conf) : null;
329        }
330        return getAddress(addr);
331      }
332    
333      public static InetSocketAddress getAddress(Configuration conf) {
334        URI filesystemURI = FileSystem.getDefaultUri(conf);
335        return getAddress(filesystemURI);
336      }
337    
338    
339      /**
340       * TODO:FEDERATION
341       * @param filesystemURI
342       * @return address of file system
343       */
344      public static InetSocketAddress getAddress(URI filesystemURI) {
345        String authority = filesystemURI.getAuthority();
346        if (authority == null) {
347          throw new IllegalArgumentException(String.format(
348              "Invalid URI for NameNode address (check %s): %s has no authority.",
349              FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
350        }
351        if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
352            filesystemURI.getScheme())) {
353          throw new IllegalArgumentException(String.format(
354              "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
355              FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
356              HdfsConstants.HDFS_URI_SCHEME));
357        }
358        return getAddress(authority);
359      }
360    
361      public static URI getUri(InetSocketAddress namenode) {
362        int port = namenode.getPort();
363        String portString = port == DEFAULT_PORT ? "" : (":"+port);
364        return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 
365            + namenode.getHostName()+portString);
366      }
367    
368      //
369      // Common NameNode methods implementation for the active name-node role.
370      //
371      public NamenodeRole getRole() {
372        return role;
373      }
374    
375      boolean isRole(NamenodeRole that) {
376        return role.equals(that);
377      }
378    
379      /**
380       * Given a configuration get the address of the service rpc server
381       * If the service rpc is not configured returns null
382       */
383      protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
384        return NameNode.getServiceAddress(conf, false);
385      }
386    
387      protected InetSocketAddress getRpcServerAddress(Configuration conf) {
388        return getAddress(conf);
389      }
390      
391      /** Given a configuration get the bind host of the service rpc server
392       *  If the bind host is not configured returns null.
393       */
394      protected String getServiceRpcServerBindHost(Configuration conf) {
395        String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
396        if (addr == null || addr.isEmpty()) {
397          return null;
398        }
399        return addr;
400      }
401    
402      /** Given a configuration get the bind host of the client rpc server
403       *  If the bind host is not configured returns null.
404       */
405      protected String getRpcServerBindHost(Configuration conf) {
406        String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
407        if (addr == null || addr.isEmpty()) {
408          return null;
409        }
410        return addr;
411      }
412       
413      /**
414       * Modifies the configuration passed to contain the service rpc address setting
415       */
416      protected void setRpcServiceServerAddress(Configuration conf,
417          InetSocketAddress serviceRPCAddress) {
418        setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
419      }
420    
421      protected void setRpcServerAddress(Configuration conf,
422          InetSocketAddress rpcAddress) {
423        FileSystem.setDefaultUri(conf, getUri(rpcAddress));
424      }
425    
426      protected InetSocketAddress getHttpServerAddress(Configuration conf) {
427        return getHttpAddress(conf);
428      }
429    
430      /** @return the NameNode HTTP address set in the conf. */
431      public static InetSocketAddress getHttpAddress(Configuration conf) {
432        return  NetUtils.createSocketAddr(
433            conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
434      }
435      
436      protected void setHttpServerAddress(Configuration conf) {
437        String hostPort = NetUtils.getHostPortString(getHttpAddress());
438        conf.set(DFS_NAMENODE_HTTP_ADDRESS_KEY, hostPort);
439        LOG.info("Web-server up at: " + hostPort);
440      }
441    
442      protected void loadNamesystem(Configuration conf) throws IOException {
443        this.namesystem = FSNamesystem.loadFromDisk(conf);
444      }
445    
446      NamenodeRegistration getRegistration() {
447        return nodeRegistration;
448      }
449    
450      NamenodeRegistration setRegistration() {
451        nodeRegistration = new NamenodeRegistration(
452            NetUtils.getHostPortString(rpcServer.getRpcAddress()),
453            NetUtils.getHostPortString(getHttpAddress()),
454            getFSImage().getStorage(), getRole());
455        return nodeRegistration;
456      }
457    
458      /* optimize ugi lookup for RPC operations to avoid a trip through
459       * UGI.getCurrentUser which is synch'ed
460       */
461      public static UserGroupInformation getRemoteUser() throws IOException {
462        UserGroupInformation ugi = Server.getRemoteUser();
463        return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
464      }
465    
466    
467      /**
468       * Login as the configured user for the NameNode.
469       */
470      void loginAsNameNodeUser(Configuration conf) throws IOException {
471        InetSocketAddress socAddr = getRpcServerAddress(conf);
472        SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
473            DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
474      }
475      
476      /**
477       * Initialize name-node.
478       * 
479       * @param conf the configuration
480       */
481      protected void initialize(Configuration conf) throws IOException {
482        UserGroupInformation.setConfiguration(conf);
483        loginAsNameNodeUser(conf);
484    
485        NameNode.initMetrics(conf, this.getRole());
486    
487        if (NamenodeRole.NAMENODE == role) {
488          startHttpServer(conf);
489          validateConfigurationSettingsOrAbort(conf);
490        }
491        loadNamesystem(conf);
492    
493        rpcServer = createRpcServer(conf);
494        if (NamenodeRole.NAMENODE == role) {
495          httpServer.setNameNodeAddress(getNameNodeAddress());
496          httpServer.setFSImage(getFSImage());
497        } else {
498          validateConfigurationSettingsOrAbort(conf);
499        }
500    
501        startCommonServices(conf);
502      }
503      
504      /**
505       * Create the RPC server implementation. Used as an extension point for the
506       * BackupNode.
507       */
508      protected NameNodeRpcServer createRpcServer(Configuration conf)
509          throws IOException {
510        return new NameNodeRpcServer(conf, this);
511      }
512    
513      /**
514       * Verifies that the final Configuration Settings look ok for the NameNode to
515       * properly start up
516       * Things to check for include:
517       * - HTTP Server Port does not equal the RPC Server Port
518       * @param conf
519       * @throws IOException
520       */
521      protected void validateConfigurationSettings(final Configuration conf) 
522          throws IOException {
523        // check to make sure the web port and rpc port do not match 
524        if(getHttpServerAddress(conf).getPort() 
525            == getRpcServerAddress(conf).getPort()) {
526          String errMsg = "dfs.namenode.rpc-address " +
527              "("+ getRpcServerAddress(conf) + ") and " +
528              "dfs.namenode.http-address ("+ getHttpServerAddress(conf) + ") " +
529              "configuration keys are bound to the same port, unable to start " +
530              "NameNode. Port: " + getRpcServerAddress(conf).getPort();
531          throw new IOException(errMsg);
532        } 
533      }
534    
535      /**
536       * Validate NameNode configuration.  Log a fatal error and abort if
537       * configuration is invalid.
538       * 
539       * @param conf Configuration to validate
540       * @throws IOException thrown if conf is invalid
541       */
542      private void validateConfigurationSettingsOrAbort(Configuration conf)
543          throws IOException {
544        try {
545          validateConfigurationSettings(conf);
546        } catch (IOException e) {
547          LOG.fatal(e.toString());
548          throw e;
549        }
550      }
551    
552      /** Start the services common to active and standby states */
553      private void startCommonServices(Configuration conf) throws IOException {
554        namesystem.startCommonServices(conf, haContext);
555        if (NamenodeRole.NAMENODE != role) {
556          startHttpServer(conf);
557          httpServer.setNameNodeAddress(getNameNodeAddress());
558          httpServer.setFSImage(getFSImage());
559        }
560        rpcServer.start();
561        plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
562            ServicePlugin.class);
563        for (ServicePlugin p: plugins) {
564          try {
565            p.start(this);
566          } catch (Throwable t) {
567            LOG.warn("ServicePlugin " + p + " could not be started", t);
568          }
569        }
570        LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
571        if (rpcServer.getServiceRpcAddress() != null) {
572          LOG.info(getRole() + " service RPC up at: "
573              + rpcServer.getServiceRpcAddress());
574        }
575      }
576      
577      private void stopCommonServices() {
578        if(namesystem != null) namesystem.close();
579        if(rpcServer != null) rpcServer.stop();
580        if (plugins != null) {
581          for (ServicePlugin p : plugins) {
582            try {
583              p.stop();
584            } catch (Throwable t) {
585              LOG.warn("ServicePlugin " + p + " could not be stopped", t);
586            }
587          }
588        }   
589        stopHttpServer();
590      }
591      
592      private void startTrashEmptier(final Configuration conf) throws IOException {
593        long trashInterval =
594            conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
595        if (trashInterval == 0) {
596          return;
597        } else if (trashInterval < 0) {
598          throw new IOException("Cannot start trash emptier with negative interval."
599              + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
600        }
601        
602        // This may be called from the transitionToActive code path, in which
603        // case the current user is the administrator, not the NN. The trash
604        // emptier needs to run as the NN. See HDFS-3972.
605        FileSystem fs = SecurityUtil.doAsLoginUser(
606            new PrivilegedExceptionAction<FileSystem>() {
607              @Override
608              public FileSystem run() throws IOException {
609                return FileSystem.get(conf);
610              }
611            });
612        this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
613        this.emptier.setDaemon(true);
614        this.emptier.start();
615      }
616      
617      private void stopTrashEmptier() {
618        if (this.emptier != null) {
619          emptier.interrupt();
620          emptier = null;
621        }
622      }
623      
624      private void startHttpServer(final Configuration conf) throws IOException {
625        httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf));
626        httpServer.start();
627        httpServer.setStartupProgress(startupProgress);
628        setHttpServerAddress(conf);
629      }
630      
631      private void stopHttpServer() {
632        try {
633          if (httpServer != null) httpServer.stop();
634        } catch (Exception e) {
635          LOG.error("Exception while stopping httpserver", e);
636        }
637      }
638    
639      /**
640       * Start NameNode.
641       * <p>
642       * The name-node can be started with one of the following startup options:
643       * <ul> 
644       * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
645       * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
646       * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
647       * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
648       * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster  
649       * upgrade and create a snapshot of the current file system state</li> 
650       * <li>{@link StartupOption#RECOVERY RECOVERY} - recover name node
651       * metadata</li>
652       * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the  
653       *            cluster back to the previous state</li>
654       * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 
655       *            previous upgrade</li>
656       * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
657       * </ul>
658       * The option is passed via configuration field: 
659       * <tt>dfs.namenode.startup</tt>
660       * 
661       * The conf will be modified to reflect the actual ports on which 
662       * the NameNode is up and running if the user passes the port as
663       * <code>zero</code> in the conf.
664       * 
665       * @param conf  confirguration
666       * @throws IOException
667       */
668      public NameNode(Configuration conf) throws IOException {
669        this(conf, NamenodeRole.NAMENODE);
670      }
671    
672      protected NameNode(Configuration conf, NamenodeRole role) 
673          throws IOException { 
674        this.conf = conf;
675        this.role = role;
676        String nsId = getNameServiceId(conf);
677        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
678        this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
679        state = createHAState();
680        this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
681        this.haContext = createHAContext();
682        try {
683          initializeGenericKeys(conf, nsId, namenodeId);
684          initialize(conf);
685          state.prepareToEnterState(haContext);
686          state.enterState(haContext);
687        } catch (IOException e) {
688          this.stop();
689          throw e;
690        } catch (HadoopIllegalArgumentException e) {
691          this.stop();
692          throw e;
693        }
694      }
695    
696      protected HAState createHAState() {
697        return !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
698      }
699    
700      protected HAContext createHAContext() {
701        return new NameNodeHAContext();
702      }
703    
704      /**
705       * Wait for service to finish.
706       * (Normally, it runs forever.)
707       */
708      public void join() {
709        try {
710          rpcServer.join();
711        } catch (InterruptedException ie) {
712          LOG.info("Caught interrupted exception ", ie);
713        }
714      }
715    
716      /**
717       * Stop all NameNode threads and wait for all to finish.
718       */
719      public void stop() {
720        synchronized(this) {
721          if (stopRequested)
722            return;
723          stopRequested = true;
724        }
725        try {
726          if (state != null) {
727            state.exitState(haContext);
728          }
729        } catch (ServiceFailedException e) {
730          LOG.warn("Encountered exception while exiting state ", e);
731        } finally {
732          stopCommonServices();
733          if (metrics != null) {
734            metrics.shutdown();
735          }
736          if (namesystem != null) {
737            namesystem.shutdown();
738          }
739        }
740      }
741    
742      synchronized boolean isStopRequested() {
743        return stopRequested;
744      }
745    
746      /**
747       * Is the cluster currently in safe mode?
748       */
749      public boolean isInSafeMode() {
750        return namesystem.isInSafeMode();
751      }
752        
753      /** get FSImage */
754      @VisibleForTesting
755      public FSImage getFSImage() {
756        return namesystem.dir.fsImage;
757      }
758    
759      /**
760       * @return NameNode RPC address
761       */
762      public InetSocketAddress getNameNodeAddress() {
763        return rpcServer.getRpcAddress();
764      }
765    
766      /**
767       * @return NameNode RPC address in "host:port" string form
768       */
769      public String getNameNodeAddressHostPortString() {
770        return NetUtils.getHostPortString(rpcServer.getRpcAddress());
771      }
772    
773      /**
774       * @return NameNode service RPC address if configured, the
775       *    NameNode RPC address otherwise
776       */
777      public InetSocketAddress getServiceRpcAddress() {
778        final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
779        return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
780      }
781    
782      /**
783       * @return NameNode HTTP address, used by the Web UI, image transfer,
784       *    and HTTP-based file system clients like Hftp and WebHDFS
785       */
786      public InetSocketAddress getHttpAddress() {
787        return httpServer.getHttpAddress();
788      }
789    
790      /**
791       * Verify that configured directories exist, then
792       * Interactively confirm that formatting is desired 
793       * for each existing directory and format them.
794       * 
795       * @param conf
796       * @param force
797       * @return true if formatting was aborted, false otherwise
798       * @throws IOException
799       */
800      private static boolean format(Configuration conf, boolean force,
801          boolean isInteractive) throws IOException {
802        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
803        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
804        initializeGenericKeys(conf, nsId, namenodeId);
805        checkAllowFormat(conf);
806    
807        if (UserGroupInformation.isSecurityEnabled()) {
808          InetSocketAddress socAddr = getAddress(conf);
809          SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
810              DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
811        }
812        
813        Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
814        List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
815        List<URI> dirsToPrompt = new ArrayList<URI>();
816        dirsToPrompt.addAll(nameDirsToFormat);
817        dirsToPrompt.addAll(sharedDirs);
818        List<URI> editDirsToFormat = 
819                     FSNamesystem.getNamespaceEditsDirs(conf);
820    
821        // if clusterID is not provided - see if you can find the current one
822        String clusterId = StartupOption.FORMAT.getClusterId();
823        if(clusterId == null || clusterId.equals("")) {
824          //Generate a new cluster id
825          clusterId = NNStorage.newClusterID();
826        }
827        System.out.println("Formatting using clusterid: " + clusterId);
828        
829        FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
830        FSNamesystem fsn = new FSNamesystem(conf, fsImage);
831        fsImage.getEditLog().initJournalsForWrite();
832        
833        if (!fsImage.confirmFormat(force, isInteractive)) {
834          return true; // aborted
835        }
836        
837        fsImage.format(fsn, clusterId);
838        return false;
839      }
840    
841      public static void checkAllowFormat(Configuration conf) throws IOException {
842        if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
843            DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
844          throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
845                    + " is set to false for this filesystem, so it "
846                    + "cannot be formatted. You will need to set "
847                    + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
848                    + "to true in order to format this filesystem");
849        }
850      }
851      
852      @VisibleForTesting
853      public static boolean initializeSharedEdits(Configuration conf) throws IOException {
854        return initializeSharedEdits(conf, true);
855      }
856      
857      @VisibleForTesting
858      public static boolean initializeSharedEdits(Configuration conf,
859          boolean force) throws IOException {
860        return initializeSharedEdits(conf, force, false);
861      }
862    
863      /**
864       * Clone the supplied configuration but remove the shared edits dirs.
865       *
866       * @param conf Supplies the original configuration.
867       * @return Cloned configuration without the shared edit dirs.
868       * @throws IOException on failure to generate the configuration.
869       */
870      private static Configuration getConfigurationWithoutSharedEdits(
871          Configuration conf)
872          throws IOException {
873        List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
874        String editsDirsString = Joiner.on(",").join(editsDirs);
875    
876        Configuration confWithoutShared = new Configuration(conf);
877        confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
878        confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
879            editsDirsString);
880        return confWithoutShared;
881      }
882    
883      /**
884       * Format a new shared edits dir and copy in enough edit log segments so that
885       * the standby NN can start up.
886       * 
887       * @param conf configuration
888       * @param force format regardless of whether or not the shared edits dir exists
889       * @param interactive prompt the user when a dir exists
890       * @return true if the command aborts, false otherwise
891       */
892      private static boolean initializeSharedEdits(Configuration conf,
893          boolean force, boolean interactive) throws IOException {
894        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
895        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
896        initializeGenericKeys(conf, nsId, namenodeId);
897        
898        if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
899          LOG.fatal("No shared edits directory configured for namespace " +
900              nsId + " namenode " + namenodeId);
901          return false;
902        }
903    
904        if (UserGroupInformation.isSecurityEnabled()) {
905          InetSocketAddress socAddr = getAddress(conf);
906          SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
907              DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
908        }
909    
910        NNStorage existingStorage = null;
911        try {
912          FSNamesystem fsns =
913              FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
914          
915          existingStorage = fsns.getFSImage().getStorage();
916          NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
917          
918          List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
919          
920          FSImage sharedEditsImage = new FSImage(conf,
921              Lists.<URI>newArrayList(),
922              sharedEditsDirs);
923          sharedEditsImage.getEditLog().initJournalsForWrite();
924          
925          if (!sharedEditsImage.confirmFormat(force, interactive)) {
926            return true; // abort
927          }
928          
929          NNStorage newSharedStorage = sharedEditsImage.getStorage();
930          // Call Storage.format instead of FSImage.format here, since we don't
931          // actually want to save a checkpoint - just prime the dirs with
932          // the existing namespace info
933          newSharedStorage.format(nsInfo);
934          sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
935    
936          // Need to make sure the edit log segments are in good shape to initialize
937          // the shared edits dir.
938          fsns.getFSImage().getEditLog().close();
939          fsns.getFSImage().getEditLog().initJournalsForWrite();
940          fsns.getFSImage().getEditLog().recoverUnclosedStreams();
941    
942          copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
943              conf);
944        } catch (IOException ioe) {
945          LOG.error("Could not initialize shared edits dir", ioe);
946          return true; // aborted
947        } finally {
948          // Have to unlock storage explicitly for the case when we're running in a
949          // unit test, which runs in the same JVM as NNs.
950          if (existingStorage != null) {
951            try {
952              existingStorage.unlockAll();
953            } catch (IOException ioe) {
954              LOG.warn("Could not unlock storage directories", ioe);
955              return true; // aborted
956            }
957          }
958        }
959        return false; // did not abort
960      }
961    
962      private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
963          Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
964          Configuration conf) throws IOException {
965        Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
966            "No shared edits specified");
967        // Copy edit log segments into the new shared edits dir.
968        List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
969        FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
970            sharedEditsUris);
971        newSharedEditLog.initJournalsForWrite();
972        newSharedEditLog.recoverUnclosedStreams();
973        
974        FSEditLog sourceEditLog = fsns.getFSImage().editLog;
975        
976        long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
977        
978        Collection<EditLogInputStream> streams = null;
979        try {
980          streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
981    
982          // Set the nextTxid to the CheckpointTxId+1
983          newSharedEditLog.setNextTxId(fromTxId + 1);
984    
985          // Copy all edits after last CheckpointTxId to shared edits dir
986          for (EditLogInputStream stream : streams) {
987            LOG.debug("Beginning to copy stream " + stream + " to shared edits");
988            FSEditLogOp op;
989            boolean segmentOpen = false;
990            while ((op = stream.readOp()) != null) {
991              if (LOG.isTraceEnabled()) {
992                LOG.trace("copying op: " + op);
993              }
994              if (!segmentOpen) {
995                newSharedEditLog.startLogSegment(op.txid, false);
996                segmentOpen = true;
997              }
998    
999              newSharedEditLog.logEdit(op);
1000    
1001              if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1002                newSharedEditLog.logSync();
1003                newSharedEditLog.endCurrentLogSegment(false);
1004                LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1005                    + stream);
1006                segmentOpen = false;
1007              }
1008            }
1009    
1010            if (segmentOpen) {
1011              LOG.debug("ending log segment because of end of stream in " + stream);
1012              newSharedEditLog.logSync();
1013              newSharedEditLog.endCurrentLogSegment(false);
1014              segmentOpen = false;
1015            }
1016          }
1017        } finally {
1018          if (streams != null) {
1019            FSEditLog.closeAllStreams(streams);
1020          }
1021        }
1022      }
1023    
1024      private static boolean finalize(Configuration conf,
1025                                   boolean isConfirmationNeeded
1026                                   ) throws IOException {
1027        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1028        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1029        initializeGenericKeys(conf, nsId, namenodeId);
1030    
1031        FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1032        System.err.print(
1033            "\"finalize\" will remove the previous state of the files system.\n"
1034            + "Recent upgrade will become permanent.\n"
1035            + "Rollback option will not be available anymore.\n");
1036        if (isConfirmationNeeded) {
1037          if (!confirmPrompt("Finalize filesystem state?")) {
1038            System.err.println("Finalize aborted.");
1039            return true;
1040          }
1041        }
1042        nsys.dir.fsImage.finalizeUpgrade();
1043        return false;
1044      }
1045    
1046      private static void printUsage(PrintStream out) {
1047        out.println(USAGE + "\n");
1048      }
1049    
1050      private static StartupOption parseArguments(String args[]) {
1051        int argsLen = (args == null) ? 0 : args.length;
1052        StartupOption startOpt = StartupOption.REGULAR;
1053        for(int i=0; i < argsLen; i++) {
1054          String cmd = args[i];
1055          if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1056            startOpt = StartupOption.FORMAT;
1057            for (i = i + 1; i < argsLen; i++) {
1058              if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1059                i++;
1060                if (i >= argsLen) {
1061                  // if no cluster id specified, return null
1062                  LOG.fatal("Must specify a valid cluster ID after the "
1063                      + StartupOption.CLUSTERID.getName() + " flag");
1064                  return null;
1065                }
1066                String clusterId = args[i];
1067                // Make sure an id is specified and not another flag
1068                if (clusterId.isEmpty() ||
1069                    clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1070                    clusterId.equalsIgnoreCase(
1071                        StartupOption.NONINTERACTIVE.getName())) {
1072                  LOG.fatal("Must specify a valid cluster ID after the "
1073                      + StartupOption.CLUSTERID.getName() + " flag");
1074                  return null;
1075                }
1076                startOpt.setClusterId(clusterId);
1077              }
1078    
1079              if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1080                startOpt.setForceFormat(true);
1081              }
1082    
1083              if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1084                startOpt.setInteractiveFormat(false);
1085              }
1086            }
1087          } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1088            startOpt = StartupOption.GENCLUSTERID;
1089          } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1090            startOpt = StartupOption.REGULAR;
1091          } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1092            startOpt = StartupOption.BACKUP;
1093          } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1094            startOpt = StartupOption.CHECKPOINT;
1095          } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
1096            startOpt = StartupOption.UPGRADE;
1097            // might be followed by two args
1098            if (i + 2 < argsLen
1099                && args[i + 1].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1100              i += 2;
1101              startOpt.setClusterId(args[i]);
1102            }
1103          } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1104            startOpt = StartupOption.ROLLBACK;
1105          } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1106            startOpt = StartupOption.FINALIZE;
1107          } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1108            startOpt = StartupOption.IMPORT;
1109          } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1110            startOpt = StartupOption.BOOTSTRAPSTANDBY;
1111            return startOpt;
1112          } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1113            startOpt = StartupOption.INITIALIZESHAREDEDITS;
1114            for (i = i + 1 ; i < argsLen; i++) {
1115              if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1116                startOpt.setInteractiveFormat(false);
1117              } else if (StartupOption.FORCE.getName().equals(args[i])) {
1118                startOpt.setForceFormat(true);
1119              } else {
1120                LOG.fatal("Invalid argument: " + args[i]);
1121                return null;
1122              }
1123            }
1124            return startOpt;
1125          } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1126            if (startOpt != StartupOption.REGULAR) {
1127              throw new RuntimeException("Can't combine -recover with " +
1128                  "other startup options.");
1129            }
1130            startOpt = StartupOption.RECOVER;
1131            while (++i < argsLen) {
1132              if (args[i].equalsIgnoreCase(
1133                    StartupOption.FORCE.getName())) {
1134                startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1135              } else {
1136                throw new RuntimeException("Error parsing recovery options: " + 
1137                  "can't understand option \"" + args[i] + "\"");
1138              }
1139            }
1140          } else {
1141            return null;
1142          }
1143        }
1144        return startOpt;
1145      }
1146    
1147      private static void setStartupOption(Configuration conf, StartupOption opt) {
1148        conf.set(DFS_NAMENODE_STARTUP_KEY, opt.toString());
1149      }
1150    
1151      static StartupOption getStartupOption(Configuration conf) {
1152        return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1153                                              StartupOption.REGULAR.toString()));
1154      }
1155    
1156      private static void doRecovery(StartupOption startOpt, Configuration conf)
1157          throws IOException {
1158        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1159        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1160        initializeGenericKeys(conf, nsId, namenodeId);
1161        if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1162          if (!confirmPrompt("You have selected Metadata Recovery mode.  " +
1163              "This mode is intended to recover lost metadata on a corrupt " +
1164              "filesystem.  Metadata recovery mode often permanently deletes " +
1165              "data from your HDFS filesystem.  Please back up your edit log " +
1166              "and fsimage before trying this!\n\n" +
1167              "Are you ready to proceed? (Y/N)\n")) {
1168            System.err.println("Recovery aborted at user request.\n");
1169            return;
1170          }
1171        }
1172        MetaRecoveryContext.LOG.info("starting recovery...");
1173        UserGroupInformation.setConfiguration(conf);
1174        NameNode.initMetrics(conf, startOpt.toNodeRole());
1175        FSNamesystem fsn = null;
1176        try {
1177          fsn = FSNamesystem.loadFromDisk(conf);
1178          fsn.saveNamespace();
1179          MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1180        } catch (IOException e) {
1181          MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1182          throw e;
1183        } catch (RuntimeException e) {
1184          MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1185          throw e;
1186        } finally {
1187          if (fsn != null)
1188            fsn.close();
1189        }
1190      }
1191    
1192      public static NameNode createNameNode(String argv[], Configuration conf)
1193          throws IOException {
1194        if (conf == null)
1195          conf = new HdfsConfiguration();
1196        StartupOption startOpt = parseArguments(argv);
1197        if (startOpt == null) {
1198          printUsage(System.err);
1199          return null;
1200        }
1201        setStartupOption(conf, startOpt);
1202        
1203        if (HAUtil.isHAEnabled(conf, DFSUtil.getNamenodeNameServiceId(conf)) &&
1204            (startOpt == StartupOption.UPGRADE ||
1205             startOpt == StartupOption.ROLLBACK ||
1206             startOpt == StartupOption.FINALIZE)) {
1207          throw new HadoopIllegalArgumentException("Invalid startup option. " +
1208              "Cannot perform DFS upgrade with HA enabled.");
1209        }
1210    
1211        switch (startOpt) {
1212          case FORMAT: {
1213            boolean aborted = format(conf, startOpt.getForceFormat(),
1214                startOpt.getInteractiveFormat());
1215            terminate(aborted ? 1 : 0);
1216            return null; // avoid javac warning
1217          }
1218          case GENCLUSTERID: {
1219            System.err.println("Generating new cluster id:");
1220            System.out.println(NNStorage.newClusterID());
1221            terminate(0);
1222            return null;
1223          }
1224          case FINALIZE: {
1225            boolean aborted = finalize(conf, true);
1226            terminate(aborted ? 1 : 0);
1227            return null; // avoid javac warning
1228          }
1229          case BOOTSTRAPSTANDBY: {
1230            String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1231            int rc = BootstrapStandby.run(toolArgs, conf);
1232            terminate(rc);
1233            return null; // avoid warning
1234          }
1235          case INITIALIZESHAREDEDITS: {
1236            boolean aborted = initializeSharedEdits(conf,
1237                startOpt.getForceFormat(),
1238                startOpt.getInteractiveFormat());
1239            terminate(aborted ? 1 : 0);
1240            return null; // avoid warning
1241          }
1242          case BACKUP:
1243          case CHECKPOINT: {
1244            NamenodeRole role = startOpt.toNodeRole();
1245            DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1246            return new BackupNode(conf, role);
1247          }
1248          case RECOVER: {
1249            NameNode.doRecovery(startOpt, conf);
1250            return null;
1251          }
1252          default: {
1253            DefaultMetricsSystem.initialize("NameNode");
1254            return new NameNode(conf);
1255          }
1256        }
1257      }
1258    
1259      /**
1260       * In federation configuration is set for a set of
1261       * namenode and secondary namenode/backup/checkpointer, which are
1262       * grouped under a logical nameservice ID. The configuration keys specific 
1263       * to them have suffix set to configured nameserviceId.
1264       * 
1265       * This method copies the value from specific key of format key.nameserviceId
1266       * to key, to set up the generic configuration. Once this is done, only
1267       * generic version of the configuration is read in rest of the code, for
1268       * backward compatibility and simpler code changes.
1269       * 
1270       * @param conf
1271       *          Configuration object to lookup specific key and to set the value
1272       *          to the key passed. Note the conf object is modified
1273       * @param nameserviceId name service Id (to distinguish federated NNs)
1274       * @param namenodeId the namenode ID (to distinguish HA NNs)
1275       * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1276       */
1277      public static void initializeGenericKeys(Configuration conf,
1278          String nameserviceId, String namenodeId) {
1279        if ((nameserviceId != null && !nameserviceId.isEmpty()) || 
1280            (namenodeId != null && !namenodeId.isEmpty())) {
1281          if (nameserviceId != null) {
1282            conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1283          }
1284          if (namenodeId != null) {
1285            conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1286          }
1287          
1288          DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1289              NAMENODE_SPECIFIC_KEYS);
1290          DFSUtil.setGenericConf(conf, nameserviceId, null,
1291              NAMESERVICE_SPECIFIC_KEYS);
1292        }
1293        
1294        // If the RPC address is set use it to (re-)configure the default FS
1295        if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1296          URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1297              + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1298          conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1299          LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1300        }
1301      }
1302        
1303      /** 
1304       * Get the name service Id for the node
1305       * @return name service Id or null if federation is not configured
1306       */
1307      protected String getNameServiceId(Configuration conf) {
1308        return DFSUtil.getNamenodeNameServiceId(conf);
1309      }
1310      
1311      /**
1312       */
1313      public static void main(String argv[]) throws Exception {
1314        if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1315          System.exit(0);
1316        }
1317    
1318        try {
1319          StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1320          NameNode namenode = createNameNode(argv, null);
1321          if (namenode != null) {
1322            namenode.join();
1323          }
1324        } catch (Throwable e) {
1325          LOG.fatal("Exception in namenode join", e);
1326          terminate(1, e);
1327        }
1328      }
1329    
1330      synchronized void monitorHealth() 
1331          throws HealthCheckFailedException, AccessControlException {
1332        namesystem.checkSuperuserPrivilege();
1333        if (!haEnabled) {
1334          return; // no-op, if HA is not enabled
1335        }
1336        getNamesystem().checkAvailableResources();
1337        if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1338          throw new HealthCheckFailedException(
1339              "The NameNode has no resources available");
1340        }
1341      }
1342      
1343      synchronized void transitionToActive() 
1344          throws ServiceFailedException, AccessControlException {
1345        namesystem.checkSuperuserPrivilege();
1346        if (!haEnabled) {
1347          throw new ServiceFailedException("HA for namenode is not enabled");
1348        }
1349        state.setState(haContext, ACTIVE_STATE);
1350      }
1351      
1352      synchronized void transitionToStandby() 
1353          throws ServiceFailedException, AccessControlException {
1354        namesystem.checkSuperuserPrivilege();
1355        if (!haEnabled) {
1356          throw new ServiceFailedException("HA for namenode is not enabled");
1357        }
1358        state.setState(haContext, STANDBY_STATE);
1359      }
1360    
1361      synchronized HAServiceStatus getServiceStatus()
1362          throws ServiceFailedException, AccessControlException {
1363        namesystem.checkSuperuserPrivilege();
1364        if (!haEnabled) {
1365          throw new ServiceFailedException("HA for namenode is not enabled");
1366        }
1367        if (state == null) {
1368          return new HAServiceStatus(HAServiceState.INITIALIZING);
1369        }
1370        HAServiceState retState = state.getServiceState();
1371        HAServiceStatus ret = new HAServiceStatus(retState);
1372        if (retState == HAServiceState.STANDBY) {
1373          String safemodeTip = namesystem.getSafeModeTip();
1374          if (!safemodeTip.isEmpty()) {
1375            ret.setNotReadyToBecomeActive(
1376                "The NameNode is in safemode. " +
1377                safemodeTip);
1378          } else {
1379            ret.setReadyToBecomeActive();
1380          }
1381        } else if (retState == HAServiceState.ACTIVE) {
1382          ret.setReadyToBecomeActive();
1383        } else {
1384          ret.setNotReadyToBecomeActive("State is " + state);
1385        }
1386        return ret;
1387      }
1388    
1389      synchronized HAServiceState getServiceState() {
1390        if (state == null) {
1391          return HAServiceState.INITIALIZING;
1392        }
1393        return state.getServiceState();
1394      }
1395    
1396      /**
1397       * Shutdown the NN immediately in an ungraceful way. Used when it would be
1398       * unsafe for the NN to continue operating, e.g. during a failed HA state
1399       * transition.
1400       * 
1401       * @param t exception which warrants the shutdown. Printed to the NN log
1402       *          before exit.
1403       * @throws ExitException thrown only for testing.
1404       */
1405      protected synchronized void doImmediateShutdown(Throwable t)
1406          throws ExitException {
1407        String message = "Error encountered requiring NN shutdown. " +
1408            "Shutting down immediately.";
1409        try {
1410          LOG.fatal(message, t);
1411        } catch (Throwable ignored) {
1412          // This is unlikely to happen, but there's nothing we can do if it does.
1413        }
1414        terminate(1, t);
1415      }
1416      
1417      /**
1418       * Class used to expose {@link NameNode} as context to {@link HAState}
1419       */
1420      protected class NameNodeHAContext implements HAContext {
1421        @Override
1422        public void setState(HAState s) {
1423          state = s;
1424        }
1425    
1426        @Override
1427        public HAState getState() {
1428          return state;
1429        }
1430    
1431        @Override
1432        public void startActiveServices() throws IOException {
1433          try {
1434            namesystem.startActiveServices();
1435            startTrashEmptier(conf);
1436          } catch (Throwable t) {
1437            doImmediateShutdown(t);
1438          }
1439        }
1440    
1441        @Override
1442        public void stopActiveServices() throws IOException {
1443          try {
1444            if (namesystem != null) {
1445              namesystem.stopActiveServices();
1446            }
1447            stopTrashEmptier();
1448          } catch (Throwable t) {
1449            doImmediateShutdown(t);
1450          }
1451        }
1452    
1453        @Override
1454        public void startStandbyServices() throws IOException {
1455          try {
1456            namesystem.startStandbyServices(conf);
1457          } catch (Throwable t) {
1458            doImmediateShutdown(t);
1459          }
1460        }
1461    
1462        @Override
1463        public void prepareToStopStandbyServices() throws ServiceFailedException {
1464          try {
1465            namesystem.prepareToStopStandbyServices();
1466          } catch (Throwable t) {
1467            doImmediateShutdown(t);
1468          }
1469        }
1470        
1471        @Override
1472        public void stopStandbyServices() throws IOException {
1473          try {
1474            if (namesystem != null) {
1475              namesystem.stopStandbyServices();
1476            }
1477          } catch (Throwable t) {
1478            doImmediateShutdown(t);
1479          }
1480        }
1481        
1482        @Override
1483        public void writeLock() {
1484          namesystem.writeLock();
1485        }
1486        
1487        @Override
1488        public void writeUnlock() {
1489          namesystem.writeUnlock();
1490        }
1491        
1492        /** Check if an operation of given category is allowed */
1493        @Override
1494        public void checkOperation(final OperationCategory op)
1495            throws StandbyException {
1496          state.checkOperation(haContext, op);
1497        }
1498        
1499        @Override
1500        public boolean allowStaleReads() {
1501          return allowStaleStandbyReads;
1502        }
1503    
1504      }
1505      
1506      public boolean isStandbyState() {
1507        return (state.equals(STANDBY_STATE));
1508      }
1509    
1510      /**
1511       * Check that a request to change this node's HA state is valid.
1512       * In particular, verifies that, if auto failover is enabled, non-forced
1513       * requests from the HAAdmin CLI are rejected, and vice versa.
1514       *
1515       * @param req the request to check
1516       * @throws AccessControlException if the request is disallowed
1517       */
1518      void checkHaStateChange(StateChangeRequestInfo req)
1519          throws AccessControlException {
1520        boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1521            DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1522        switch (req.getSource()) {
1523        case REQUEST_BY_USER:
1524          if (autoHaEnabled) {
1525            throw new AccessControlException(
1526                "Manual HA control for this NameNode is disallowed, because " +
1527                "automatic HA is enabled.");
1528          }
1529          break;
1530        case REQUEST_BY_USER_FORCED:
1531          if (autoHaEnabled) {
1532            LOG.warn("Allowing manual HA control from " +
1533                Server.getRemoteAddress() +
1534                " even though automatic HA is enabled, because the user " +
1535                "specified the force flag");
1536          }
1537          break;
1538        case REQUEST_BY_ZKFC:
1539          if (!autoHaEnabled) {
1540            throw new AccessControlException(
1541                "Request from ZK failover controller at " +
1542                Server.getRemoteAddress() + " denied since automatic HA " +
1543                "is not enabled"); 
1544          }
1545          break;
1546        }
1547      }
1548    }