001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
021    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
022    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
023    
024    import java.io.File;
025    import java.io.IOException;
026    import java.io.PrintStream;
027    import java.net.InetSocketAddress;
028    import java.net.URI;
029    import java.security.PrivilegedExceptionAction;
030    import java.util.ArrayList;
031    import java.util.Arrays;
032    import java.util.Collection;
033    import java.util.Iterator;
034    import java.util.List;
035    
036    import javax.management.ObjectName;
037    
038    import org.apache.commons.logging.Log;
039    import org.apache.commons.logging.LogFactory;
040    import org.apache.hadoop.HadoopIllegalArgumentException;
041    import org.apache.hadoop.classification.InterfaceAudience;
042    import org.apache.hadoop.conf.Configuration;
043    import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
044    import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
045    import org.apache.hadoop.ha.HAServiceStatus;
046    import org.apache.hadoop.ha.HealthCheckFailedException;
047    import org.apache.hadoop.ha.ServiceFailedException;
048    import org.apache.hadoop.fs.FileSystem;
049    import org.apache.hadoop.fs.FileUtil;
050    import org.apache.hadoop.fs.Trash;
051    
052    import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
053    import static org.apache.hadoop.util.ExitUtil.terminate;
054    import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
055    
056    import org.apache.hadoop.hdfs.DFSConfigKeys;
057    import org.apache.hadoop.hdfs.DFSUtil;
058    import org.apache.hadoop.hdfs.HAUtil;
059    import org.apache.hadoop.hdfs.HdfsConfiguration;
060    import org.apache.hadoop.hdfs.protocol.ClientProtocol;
061    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
062    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
063    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
064    import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
065    import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby;
066    import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
067    import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
068    import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
069    import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
070    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
071    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
072    import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
073    import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
074    import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
075    import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
076    import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
077    import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
078    import org.apache.hadoop.ipc.Server;
079    import org.apache.hadoop.ipc.StandbyException;
080    import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
081    import org.apache.hadoop.metrics2.util.MBeans;
082    import org.apache.hadoop.net.NetUtils;
083    import org.apache.hadoop.security.AccessControlException;
084    import org.apache.hadoop.security.RefreshUserMappingsProtocol;
085    import org.apache.hadoop.security.SecurityUtil;
086    import org.apache.hadoop.security.UserGroupInformation;
087    import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
088    import org.apache.hadoop.tools.GetUserMappingsProtocol;
089    import org.apache.hadoop.util.ExitUtil.ExitException;
090    import org.apache.hadoop.util.JvmPauseMonitor;
091    import org.apache.hadoop.util.ServicePlugin;
092    import org.apache.hadoop.util.StringUtils;
093    
094    import com.google.common.annotations.VisibleForTesting;
095    import com.google.common.base.Joiner;
096    import com.google.common.base.Preconditions;
097    import com.google.common.collect.Lists;
098    
099    /**********************************************************
100     * NameNode serves as both directory namespace manager and
101     * "inode table" for the Hadoop DFS.  There is a single NameNode
102     * running in any DFS deployment.  (Well, except when there
103     * is a second backup/failover NameNode, or when using federated NameNodes.)
104     *
105     * The NameNode controls two critical tables:
106     *   1)  filename->blocksequence (namespace)
107     *   2)  block->machinelist ("inodes")
108     *
109     * The first table is stored on disk and is very precious.
110     * The second table is rebuilt every time the NameNode comes up.
111     *
112     * 'NameNode' refers to both this class as well as the 'NameNode server'.
113     * The 'FSNamesystem' class actually performs most of the filesystem
114     * management.  The majority of the 'NameNode' class itself is concerned
115     * with exposing the IPC interface and the HTTP server to the outside world,
116     * plus some configuration management.
117     *
118     * NameNode implements the
119     * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
120     * allows clients to ask for DFS services.
121     * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
122     * direct use by authors of DFS client code.  End-users should instead use the
123     * {@link org.apache.hadoop.fs.FileSystem} class.
124     *
125     * NameNode also implements the
126     * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
127     * used by DataNodes that actually store DFS data blocks.  These
128     * methods are invoked repeatedly and automatically by all the
129     * DataNodes in a DFS deployment.
130     *
131     * NameNode also implements the
132     * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
133     * used by secondary namenodes or rebalancing processes to get partial
134     * NameNode state, for example partial blocksMap etc.
135     **********************************************************/
136    @InterfaceAudience.Private
137    public class NameNode implements NameNodeStatusMXBean {
138      static{
139        HdfsConfiguration.init();
140      }
141      
142      /**
143       * Categories of operations supported by the namenode.
144       */
145      public static enum OperationCategory {
146        /** Operations that are state agnostic */
147        UNCHECKED,
148        /** Read operation that does not change the namespace state */
149        READ,
150        /** Write operation that changes the namespace state */
151        WRITE,
152        /** Operations related to checkpointing */
153        CHECKPOINT,
154        /** Operations related to {@link JournalProtocol} */
155        JOURNAL
156      }
157      
158      /**
159       * HDFS configuration can have three types of parameters:
160       * <ol>
161       * <li>Parameters that are common for all the name services in the cluster.</li>
162       * <li>Parameters that are specific to a name service. These keys are suffixed
163       * with nameserviceId in the configuration. For example,
164       * "dfs.namenode.rpc-address.nameservice1".</li>
165       * <li>Parameters that are specific to a single name node. These keys are suffixed
166       * with nameserviceId and namenodeId in the configuration. for example,
167       * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
168       * </ol>
169       * 
170       * In the latter cases, operators may specify the configuration without
171       * any suffix, with a nameservice suffix, or with a nameservice and namenode
172       * suffix. The more specific suffix will take precedence.
173       * 
174       * These keys are specific to a given namenode, and thus may be configured
175       * globally, for a nameservice, or for a specific namenode within a nameservice.
176       */
177      public static final String[] NAMENODE_SPECIFIC_KEYS = {
178        DFS_NAMENODE_RPC_ADDRESS_KEY,
179        DFS_NAMENODE_RPC_BIND_HOST_KEY,
180        DFS_NAMENODE_NAME_DIR_KEY,
181        DFS_NAMENODE_EDITS_DIR_KEY,
182        DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
183        DFS_NAMENODE_CHECKPOINT_DIR_KEY,
184        DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
185        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
186        DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
187        DFS_NAMENODE_HTTP_ADDRESS_KEY,
188        DFS_NAMENODE_KEYTAB_FILE_KEY,
189        DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
190        DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
191        DFS_NAMENODE_BACKUP_ADDRESS_KEY,
192        DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
193        DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
194        DFS_NAMENODE_USER_NAME_KEY,
195        DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY,
196        DFS_HA_FENCE_METHODS_KEY,
197        DFS_HA_ZKFC_PORT_KEY,
198        DFS_HA_FENCE_METHODS_KEY
199      };
200      
201      /**
202       * @see #NAMENODE_SPECIFIC_KEYS
203       * These keys are specific to a nameservice, but may not be overridden
204       * for a specific namenode.
205       */
206      public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
207        DFS_HA_AUTO_FAILOVER_ENABLED_KEY
208      };
209      
210      private static final String USAGE = "Usage: java NameNode ["
211          + StartupOption.BACKUP.getName() + "] | ["
212          + StartupOption.CHECKPOINT.getName() + "] | ["
213          + StartupOption.FORMAT.getName() + " ["
214          + StartupOption.CLUSTERID.getName() + " cid ] ["
215          + StartupOption.FORCE.getName() + "] ["
216          + StartupOption.NONINTERACTIVE.getName() + "] ] | ["
217          + StartupOption.UPGRADE.getName() + "] | ["
218          + StartupOption.ROLLBACK.getName() + "] | ["
219          + StartupOption.FINALIZE.getName() + "] | ["
220          + StartupOption.IMPORT.getName() + "] | ["
221          + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | ["
222          + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | ["
223          + StartupOption.RECOVER.getName() + " [ " + StartupOption.FORCE.getName()
224          + " ] ]";
225      
226      public long getProtocolVersion(String protocol, 
227                                     long clientVersion) throws IOException {
228        if (protocol.equals(ClientProtocol.class.getName())) {
229          return ClientProtocol.versionID; 
230        } else if (protocol.equals(DatanodeProtocol.class.getName())){
231          return DatanodeProtocol.versionID;
232        } else if (protocol.equals(NamenodeProtocol.class.getName())){
233          return NamenodeProtocol.versionID;
234        } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
235          return RefreshAuthorizationPolicyProtocol.versionID;
236        } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
237          return RefreshUserMappingsProtocol.versionID;
238        } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
239          return GetUserMappingsProtocol.versionID;
240        } else {
241          throw new IOException("Unknown protocol to name node: " + protocol);
242        }
243      }
244        
245      public static final int DEFAULT_PORT = 8020;
246      public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
247      public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
248      public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
249      public static final HAState ACTIVE_STATE = new ActiveState();
250      public static final HAState STANDBY_STATE = new StandbyState();
251      
252      protected FSNamesystem namesystem; 
253      protected final Configuration conf;
254      protected NamenodeRole role;
255      private volatile HAState state;
256      private final boolean haEnabled;
257      private final HAContext haContext;
258      protected boolean allowStaleStandbyReads;
259    
260      
261      /** httpServer */
262      protected NameNodeHttpServer httpServer;
263      private Thread emptier;
264      /** only used for testing purposes  */
265      protected boolean stopRequested = false;
266      /** Registration information of this name-node  */
267      protected NamenodeRegistration nodeRegistration;
268      /** Activated plug-ins. */
269      private List<ServicePlugin> plugins;
270      
271      private NameNodeRpcServer rpcServer;
272    
273      private JvmPauseMonitor pauseMonitor;
274      private ObjectName nameNodeStatusBeanName;
275      
276      /** Format a new filesystem.  Destroys any filesystem that may already
277       * exist at this location.  **/
278      public static void format(Configuration conf) throws IOException {
279        format(conf, true, true);
280      }
281    
282      static NameNodeMetrics metrics;
283      private static final StartupProgress startupProgress = new StartupProgress();
284      /** Return the {@link FSNamesystem} object.
285       * @return {@link FSNamesystem} object.
286       */
287      public FSNamesystem getNamesystem() {
288        return namesystem;
289      }
290    
291      public NamenodeProtocols getRpcServer() {
292        return rpcServer;
293      }
294      
295      static void initMetrics(Configuration conf, NamenodeRole role) {
296        metrics = NameNodeMetrics.create(conf, role);
297      }
298    
299      public static NameNodeMetrics getNameNodeMetrics() {
300        return metrics;
301      }
302    
303      /**
304       * Returns object used for reporting namenode startup progress.
305       * 
306       * @return StartupProgress for reporting namenode startup progress
307       */
308      public static StartupProgress getStartupProgress() {
309        return startupProgress;
310      }
311    
312      public static InetSocketAddress getAddress(String address) {
313        return NetUtils.createSocketAddr(address, DEFAULT_PORT);
314      }
315      
316      /**
317       * Set the configuration property for the service rpc address
318       * to address
319       */
320      public static void setServiceAddress(Configuration conf,
321                                               String address) {
322        LOG.info("Setting ADDRESS " + address);
323        conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
324      }
325      
326      /**
327       * Fetches the address for services to use when connecting to namenode
328       * based on the value of fallback returns null if the special
329       * address is not specified or returns the default namenode address
330       * to be used by both clients and services.
331       * Services here are datanodes, backup node, any non client connection
332       */
333      public static InetSocketAddress getServiceAddress(Configuration conf,
334                                                            boolean fallback) {
335        String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
336        if (addr == null || addr.isEmpty()) {
337          return fallback ? getAddress(conf) : null;
338        }
339        return getAddress(addr);
340      }
341    
342      public static InetSocketAddress getAddress(Configuration conf) {
343        URI filesystemURI = FileSystem.getDefaultUri(conf);
344        return getAddress(filesystemURI);
345      }
346    
347    
348      /**
349       * TODO:FEDERATION
350       * @param filesystemURI
351       * @return address of file system
352       */
353      public static InetSocketAddress getAddress(URI filesystemURI) {
354        String authority = filesystemURI.getAuthority();
355        if (authority == null) {
356          throw new IllegalArgumentException(String.format(
357              "Invalid URI for NameNode address (check %s): %s has no authority.",
358              FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
359        }
360        if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
361            filesystemURI.getScheme())) {
362          throw new IllegalArgumentException(String.format(
363              "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
364              FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
365              HdfsConstants.HDFS_URI_SCHEME));
366        }
367        return getAddress(authority);
368      }
369    
370      public static URI getUri(InetSocketAddress namenode) {
371        int port = namenode.getPort();
372        String portString = port == DEFAULT_PORT ? "" : (":"+port);
373        return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 
374            + namenode.getHostName()+portString);
375      }
376    
377      //
378      // Common NameNode methods implementation for the active name-node role.
379      //
380      public NamenodeRole getRole() {
381        return role;
382      }
383    
384      boolean isRole(NamenodeRole that) {
385        return role.equals(that);
386      }
387    
388      /**
389       * Given a configuration get the address of the service rpc server
390       * If the service rpc is not configured returns null
391       */
392      protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
393        return NameNode.getServiceAddress(conf, false);
394      }
395    
396      protected InetSocketAddress getRpcServerAddress(Configuration conf) {
397        return getAddress(conf);
398      }
399      
400      /** Given a configuration get the bind host of the service rpc server
401       *  If the bind host is not configured returns null.
402       */
403      protected String getServiceRpcServerBindHost(Configuration conf) {
404        String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
405        if (addr == null || addr.isEmpty()) {
406          return null;
407        }
408        return addr;
409      }
410    
411      /** Given a configuration get the bind host of the client rpc server
412       *  If the bind host is not configured returns null.
413       */
414      protected String getRpcServerBindHost(Configuration conf) {
415        String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
416        if (addr == null || addr.isEmpty()) {
417          return null;
418        }
419        return addr;
420      }
421       
422      /**
423       * Modifies the configuration passed to contain the service rpc address setting
424       */
425      protected void setRpcServiceServerAddress(Configuration conf,
426          InetSocketAddress serviceRPCAddress) {
427        setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
428      }
429    
430      protected void setRpcServerAddress(Configuration conf,
431          InetSocketAddress rpcAddress) {
432        FileSystem.setDefaultUri(conf, getUri(rpcAddress));
433      }
434    
435      protected InetSocketAddress getHttpServerAddress(Configuration conf) {
436        return getHttpAddress(conf);
437      }
438    
439      /** @return the NameNode HTTP address. */
440      public static InetSocketAddress getHttpAddress(Configuration conf) {
441        return  NetUtils.createSocketAddr(
442            conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
443      }
444    
445      protected void loadNamesystem(Configuration conf) throws IOException {
446        this.namesystem = FSNamesystem.loadFromDisk(conf);
447      }
448    
449      NamenodeRegistration getRegistration() {
450        return nodeRegistration;
451      }
452    
453      NamenodeRegistration setRegistration() {
454        nodeRegistration = new NamenodeRegistration(
455            NetUtils.getHostPortString(rpcServer.getRpcAddress()),
456            NetUtils.getHostPortString(getHttpAddress()),
457            getFSImage().getStorage(), getRole());
458        return nodeRegistration;
459      }
460    
461      /* optimize ugi lookup for RPC operations to avoid a trip through
462       * UGI.getCurrentUser which is synch'ed
463       */
464      public static UserGroupInformation getRemoteUser() throws IOException {
465        UserGroupInformation ugi = Server.getRemoteUser();
466        return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
467      }
468    
469    
470      /**
471       * Login as the configured user for the NameNode.
472       */
473      void loginAsNameNodeUser(Configuration conf) throws IOException {
474        InetSocketAddress socAddr = getRpcServerAddress(conf);
475        SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
476            DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
477      }
478      
479      /**
480       * Initialize name-node.
481       * 
482       * @param conf the configuration
483       */
484      protected void initialize(Configuration conf) throws IOException {
485        if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
486          String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
487          if (intervals != null) {
488            conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
489              intervals);
490          }
491        }
492    
493        UserGroupInformation.setConfiguration(conf);
494        loginAsNameNodeUser(conf);
495    
496        NameNode.initMetrics(conf, this.getRole());
497        StartupProgressMetrics.register(startupProgress);
498    
499        if (NamenodeRole.NAMENODE == role) {
500          startHttpServer(conf);
501        }
502        loadNamesystem(conf);
503    
504        rpcServer = createRpcServer(conf);
505        if (NamenodeRole.NAMENODE == role) {
506          httpServer.setNameNodeAddress(getNameNodeAddress());
507          httpServer.setFSImage(getFSImage());
508        }
509        
510        pauseMonitor = new JvmPauseMonitor(conf);
511        pauseMonitor.start();
512    
513        startCommonServices(conf);
514      }
515      
516      /**
517       * Create the RPC server implementation. Used as an extension point for the
518       * BackupNode.
519       */
520      protected NameNodeRpcServer createRpcServer(Configuration conf)
521          throws IOException {
522        return new NameNodeRpcServer(conf, this);
523      }
524    
525      /** Start the services common to active and standby states */
526      private void startCommonServices(Configuration conf) throws IOException {
527        namesystem.startCommonServices(conf, haContext);
528        registerNNSMXBean();
529        if (NamenodeRole.NAMENODE != role) {
530          startHttpServer(conf);
531          httpServer.setNameNodeAddress(getNameNodeAddress());
532          httpServer.setFSImage(getFSImage());
533        }
534        rpcServer.start();
535        plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
536            ServicePlugin.class);
537        for (ServicePlugin p: plugins) {
538          try {
539            p.start(this);
540          } catch (Throwable t) {
541            LOG.warn("ServicePlugin " + p + " could not be started", t);
542          }
543        }
544        LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
545        if (rpcServer.getServiceRpcAddress() != null) {
546          LOG.info(getRole() + " service RPC up at: "
547              + rpcServer.getServiceRpcAddress());
548        }
549      }
550      
551      private void stopCommonServices() {
552        if(rpcServer != null) rpcServer.stop();
553        if(namesystem != null) namesystem.close();
554        if (pauseMonitor != null) pauseMonitor.stop();
555        if (plugins != null) {
556          for (ServicePlugin p : plugins) {
557            try {
558              p.stop();
559            } catch (Throwable t) {
560              LOG.warn("ServicePlugin " + p + " could not be stopped", t);
561            }
562          }
563        }   
564        stopHttpServer();
565      }
566      
567      private void startTrashEmptier(final Configuration conf) throws IOException {
568        long trashInterval =
569            conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
570        if (trashInterval == 0) {
571          return;
572        } else if (trashInterval < 0) {
573          throw new IOException("Cannot start trash emptier with negative interval."
574              + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
575        }
576        
577        // This may be called from the transitionToActive code path, in which
578        // case the current user is the administrator, not the NN. The trash
579        // emptier needs to run as the NN. See HDFS-3972.
580        FileSystem fs = SecurityUtil.doAsLoginUser(
581            new PrivilegedExceptionAction<FileSystem>() {
582              @Override
583              public FileSystem run() throws IOException {
584                return FileSystem.get(conf);
585              }
586            });
587        this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
588        this.emptier.setDaemon(true);
589        this.emptier.start();
590      }
591      
592      private void stopTrashEmptier() {
593        if (this.emptier != null) {
594          emptier.interrupt();
595          emptier = null;
596        }
597      }
598      
599      private void startHttpServer(final Configuration conf) throws IOException {
600        httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf));
601        httpServer.start();
602        httpServer.setStartupProgress(startupProgress);
603      }
604      
605      private void stopHttpServer() {
606        try {
607          if (httpServer != null) httpServer.stop();
608        } catch (Exception e) {
609          LOG.error("Exception while stopping httpserver", e);
610        }
611      }
612    
613      /**
614       * Start NameNode.
615       * <p>
616       * The name-node can be started with one of the following startup options:
617       * <ul> 
618       * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
619       * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
620       * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
621       * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
622       * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster  
623       * upgrade and create a snapshot of the current file system state</li> 
624       * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node
625       * metadata</li>
626       * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the  
627       *            cluster back to the previous state</li>
628       * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 
629       *            previous upgrade</li>
630       * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
631       * </ul>
632       * The option is passed via configuration field: 
633       * <tt>dfs.namenode.startup</tt>
634       * 
635       * The conf will be modified to reflect the actual ports on which 
636       * the NameNode is up and running if the user passes the port as
637       * <code>zero</code> in the conf.
638       * 
639       * @param conf  confirguration
640       * @throws IOException
641       */
642      public NameNode(Configuration conf) throws IOException {
643        this(conf, NamenodeRole.NAMENODE);
644      }
645    
646      protected NameNode(Configuration conf, NamenodeRole role) 
647          throws IOException { 
648        this.conf = conf;
649        this.role = role;
650        String nsId = getNameServiceId(conf);
651        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
652        this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
653        state = createHAState();
654        this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
655        this.haContext = createHAContext();
656        try {
657          initializeGenericKeys(conf, nsId, namenodeId);
658          initialize(conf);
659          try {
660            haContext.writeLock();
661            state.prepareToEnterState(haContext);
662            state.enterState(haContext);
663          } finally {
664            haContext.writeUnlock();
665          }
666        } catch (IOException e) {
667          this.stop();
668          throw e;
669        } catch (HadoopIllegalArgumentException e) {
670          this.stop();
671          throw e;
672        }
673      }
674    
675      protected HAState createHAState() {
676        return !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
677      }
678    
679      protected HAContext createHAContext() {
680        return new NameNodeHAContext();
681      }
682    
683      /**
684       * Wait for service to finish.
685       * (Normally, it runs forever.)
686       */
687      public void join() {
688        try {
689          rpcServer.join();
690        } catch (InterruptedException ie) {
691          LOG.info("Caught interrupted exception ", ie);
692        }
693      }
694    
695      /**
696       * Stop all NameNode threads and wait for all to finish.
697       */
698      public void stop() {
699        synchronized(this) {
700          if (stopRequested)
701            return;
702          stopRequested = true;
703        }
704        try {
705          if (state != null) {
706            state.exitState(haContext);
707          }
708        } catch (ServiceFailedException e) {
709          LOG.warn("Encountered exception while exiting state ", e);
710        } finally {
711          stopCommonServices();
712          if (metrics != null) {
713            metrics.shutdown();
714          }
715          if (namesystem != null) {
716            namesystem.shutdown();
717          }
718          if (nameNodeStatusBeanName != null) {
719            MBeans.unregister(nameNodeStatusBeanName);
720            nameNodeStatusBeanName = null;
721          }
722        }
723      }
724    
725      synchronized boolean isStopRequested() {
726        return stopRequested;
727      }
728    
729      /**
730       * Is the cluster currently in safe mode?
731       */
732      public boolean isInSafeMode() {
733        return namesystem.isInSafeMode();
734      }
735        
736      /** get FSImage */
737      @VisibleForTesting
738      public FSImage getFSImage() {
739        return namesystem.dir.fsImage;
740      }
741    
742      /**
743       * @return NameNode RPC address
744       */
745      public InetSocketAddress getNameNodeAddress() {
746        return rpcServer.getRpcAddress();
747      }
748    
749      /**
750       * @return NameNode RPC address in "host:port" string form
751       */
752      public String getNameNodeAddressHostPortString() {
753        return NetUtils.getHostPortString(rpcServer.getRpcAddress());
754      }
755    
756      /**
757       * @return NameNode service RPC address if configured, the
758       *    NameNode RPC address otherwise
759       */
760      public InetSocketAddress getServiceRpcAddress() {
761        final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
762        return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
763      }
764    
765      /**
766       * @return NameNode HTTP address, used by the Web UI, image transfer,
767       *    and HTTP-based file system clients like Hftp and WebHDFS
768       */
769      public InetSocketAddress getHttpAddress() {
770        return httpServer.getHttpAddress();
771      }
772    
773      /**
774       * @return NameNode HTTPS address, used by the Web UI, image transfer,
775       *    and HTTP-based file system clients like Hftp and WebHDFS
776       */
777      public InetSocketAddress getHttpsAddress() {
778        return httpServer.getHttpsAddress();
779      }
780    
781      /**
782       * Verify that configured directories exist, then
783       * Interactively confirm that formatting is desired 
784       * for each existing directory and format them.
785       * 
786       * @param conf
787       * @param force
788       * @return true if formatting was aborted, false otherwise
789       * @throws IOException
790       */
791      private static boolean format(Configuration conf, boolean force,
792          boolean isInteractive) throws IOException {
793        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
794        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
795        initializeGenericKeys(conf, nsId, namenodeId);
796        checkAllowFormat(conf);
797    
798        if (UserGroupInformation.isSecurityEnabled()) {
799          InetSocketAddress socAddr = getAddress(conf);
800          SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
801              DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
802        }
803        
804        Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
805        List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
806        List<URI> dirsToPrompt = new ArrayList<URI>();
807        dirsToPrompt.addAll(nameDirsToFormat);
808        dirsToPrompt.addAll(sharedDirs);
809        List<URI> editDirsToFormat = 
810                     FSNamesystem.getNamespaceEditsDirs(conf);
811    
812        // if clusterID is not provided - see if you can find the current one
813        String clusterId = StartupOption.FORMAT.getClusterId();
814        if(clusterId == null || clusterId.equals("")) {
815          //Generate a new cluster id
816          clusterId = NNStorage.newClusterID();
817        }
818        System.out.println("Formatting using clusterid: " + clusterId);
819        
820        FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
821        try {
822          FSNamesystem fsn = new FSNamesystem(conf, fsImage);
823          fsImage.getEditLog().initJournalsForWrite();
824    
825          if (!fsImage.confirmFormat(force, isInteractive)) {
826            return true; // aborted
827          }
828    
829          fsImage.format(fsn, clusterId);
830        } catch (IOException ioe) {
831          LOG.warn("Encountered exception during format: ", ioe);
832          fsImage.close();
833          throw ioe;
834        }
835        return false;
836      }
837    
838      public static void checkAllowFormat(Configuration conf) throws IOException {
839        if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
840            DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
841          throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
842                    + " is set to false for this filesystem, so it "
843                    + "cannot be formatted. You will need to set "
844                    + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
845                    + "to true in order to format this filesystem");
846        }
847      }
848      
849      @VisibleForTesting
850      public static boolean initializeSharedEdits(Configuration conf) throws IOException {
851        return initializeSharedEdits(conf, true);
852      }
853      
854      @VisibleForTesting
855      public static boolean initializeSharedEdits(Configuration conf,
856          boolean force) throws IOException {
857        return initializeSharedEdits(conf, force, false);
858      }
859    
860      /**
861       * Clone the supplied configuration but remove the shared edits dirs.
862       *
863       * @param conf Supplies the original configuration.
864       * @return Cloned configuration without the shared edit dirs.
865       * @throws IOException on failure to generate the configuration.
866       */
867      private static Configuration getConfigurationWithoutSharedEdits(
868          Configuration conf)
869          throws IOException {
870        List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
871        String editsDirsString = Joiner.on(",").join(editsDirs);
872    
873        Configuration confWithoutShared = new Configuration(conf);
874        confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
875        confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
876            editsDirsString);
877        return confWithoutShared;
878      }
879    
880      /**
881       * Format a new shared edits dir and copy in enough edit log segments so that
882       * the standby NN can start up.
883       * 
884       * @param conf configuration
885       * @param force format regardless of whether or not the shared edits dir exists
886       * @param interactive prompt the user when a dir exists
887       * @return true if the command aborts, false otherwise
888       */
889      private static boolean initializeSharedEdits(Configuration conf,
890          boolean force, boolean interactive) throws IOException {
891        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
892        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
893        initializeGenericKeys(conf, nsId, namenodeId);
894        
895        if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
896          LOG.fatal("No shared edits directory configured for namespace " +
897              nsId + " namenode " + namenodeId);
898          return false;
899        }
900    
901        if (UserGroupInformation.isSecurityEnabled()) {
902          InetSocketAddress socAddr = getAddress(conf);
903          SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
904              DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
905        }
906    
907        NNStorage existingStorage = null;
908        FSImage sharedEditsImage = null;
909        try {
910          FSNamesystem fsns =
911              FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
912          
913          existingStorage = fsns.getFSImage().getStorage();
914          NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
915          
916          List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
917          
918          sharedEditsImage = new FSImage(conf,
919              Lists.<URI>newArrayList(),
920              sharedEditsDirs);
921          sharedEditsImage.getEditLog().initJournalsForWrite();
922          
923          if (!sharedEditsImage.confirmFormat(force, interactive)) {
924            return true; // abort
925          }
926          
927          NNStorage newSharedStorage = sharedEditsImage.getStorage();
928          // Call Storage.format instead of FSImage.format here, since we don't
929          // actually want to save a checkpoint - just prime the dirs with
930          // the existing namespace info
931          newSharedStorage.format(nsInfo);
932          sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
933    
934          // Need to make sure the edit log segments are in good shape to initialize
935          // the shared edits dir.
936          fsns.getFSImage().getEditLog().close();
937          fsns.getFSImage().getEditLog().initJournalsForWrite();
938          fsns.getFSImage().getEditLog().recoverUnclosedStreams();
939    
940          copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
941              conf);
942        } catch (IOException ioe) {
943          LOG.error("Could not initialize shared edits dir", ioe);
944          return true; // aborted
945        } finally {
946          if (sharedEditsImage != null) {
947            try {
948              sharedEditsImage.close();
949            }  catch (IOException ioe) {
950              LOG.warn("Could not close sharedEditsImage", ioe);
951            }
952          }
953          // Have to unlock storage explicitly for the case when we're running in a
954          // unit test, which runs in the same JVM as NNs.
955          if (existingStorage != null) {
956            try {
957              existingStorage.unlockAll();
958            } catch (IOException ioe) {
959              LOG.warn("Could not unlock storage directories", ioe);
960              return true; // aborted
961            }
962          }
963        }
964        return false; // did not abort
965      }
966    
967      private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
968          Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
969          Configuration conf) throws IOException {
970        Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
971            "No shared edits specified");
972        // Copy edit log segments into the new shared edits dir.
973        List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
974        FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
975            sharedEditsUris);
976        newSharedEditLog.initJournalsForWrite();
977        newSharedEditLog.recoverUnclosedStreams();
978        
979        FSEditLog sourceEditLog = fsns.getFSImage().editLog;
980        
981        long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
982        
983        Collection<EditLogInputStream> streams = null;
984        try {
985          streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
986    
987          // Set the nextTxid to the CheckpointTxId+1
988          newSharedEditLog.setNextTxId(fromTxId + 1);
989    
990          // Copy all edits after last CheckpointTxId to shared edits dir
991          for (EditLogInputStream stream : streams) {
992            LOG.debug("Beginning to copy stream " + stream + " to shared edits");
993            FSEditLogOp op;
994            boolean segmentOpen = false;
995            while ((op = stream.readOp()) != null) {
996              if (LOG.isTraceEnabled()) {
997                LOG.trace("copying op: " + op);
998              }
999              if (!segmentOpen) {
1000                newSharedEditLog.startLogSegment(op.txid, false);
1001                segmentOpen = true;
1002              }
1003    
1004              newSharedEditLog.logEdit(op);
1005    
1006              if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1007                newSharedEditLog.logSync();
1008                newSharedEditLog.endCurrentLogSegment(false);
1009                LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1010                    + stream);
1011                segmentOpen = false;
1012              }
1013            }
1014    
1015            if (segmentOpen) {
1016              LOG.debug("ending log segment because of end of stream in " + stream);
1017              newSharedEditLog.logSync();
1018              newSharedEditLog.endCurrentLogSegment(false);
1019              segmentOpen = false;
1020            }
1021          }
1022        } finally {
1023          if (streams != null) {
1024            FSEditLog.closeAllStreams(streams);
1025          }
1026        }
1027      }
1028    
1029      private static boolean finalize(Configuration conf,
1030                                   boolean isConfirmationNeeded
1031                                   ) throws IOException {
1032        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1033        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1034        initializeGenericKeys(conf, nsId, namenodeId);
1035    
1036        FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1037        System.err.print(
1038            "\"finalize\" will remove the previous state of the files system.\n"
1039            + "Recent upgrade will become permanent.\n"
1040            + "Rollback option will not be available anymore.\n");
1041        if (isConfirmationNeeded) {
1042          if (!confirmPrompt("Finalize filesystem state?")) {
1043            System.err.println("Finalize aborted.");
1044            return true;
1045          }
1046        }
1047        nsys.dir.fsImage.finalizeUpgrade();
1048        return false;
1049      }
1050    
1051      private static void printUsage(PrintStream out) {
1052        out.println(USAGE + "\n");
1053      }
1054    
1055      private static StartupOption parseArguments(String args[]) {
1056        int argsLen = (args == null) ? 0 : args.length;
1057        StartupOption startOpt = StartupOption.REGULAR;
1058        for(int i=0; i < argsLen; i++) {
1059          String cmd = args[i];
1060          if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1061            startOpt = StartupOption.FORMAT;
1062            for (i = i + 1; i < argsLen; i++) {
1063              if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1064                i++;
1065                if (i >= argsLen) {
1066                  // if no cluster id specified, return null
1067                  LOG.fatal("Must specify a valid cluster ID after the "
1068                      + StartupOption.CLUSTERID.getName() + " flag");
1069                  return null;
1070                }
1071                String clusterId = args[i];
1072                // Make sure an id is specified and not another flag
1073                if (clusterId.isEmpty() ||
1074                    clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1075                    clusterId.equalsIgnoreCase(
1076                        StartupOption.NONINTERACTIVE.getName())) {
1077                  LOG.fatal("Must specify a valid cluster ID after the "
1078                      + StartupOption.CLUSTERID.getName() + " flag");
1079                  return null;
1080                }
1081                startOpt.setClusterId(clusterId);
1082              }
1083    
1084              if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1085                startOpt.setForceFormat(true);
1086              }
1087    
1088              if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1089                startOpt.setInteractiveFormat(false);
1090              }
1091            }
1092          } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1093            startOpt = StartupOption.GENCLUSTERID;
1094          } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1095            startOpt = StartupOption.REGULAR;
1096          } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1097            startOpt = StartupOption.BACKUP;
1098          } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1099            startOpt = StartupOption.CHECKPOINT;
1100          } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
1101            startOpt = StartupOption.UPGRADE;
1102            // might be followed by two args
1103            if (i + 2 < argsLen
1104                && args[i + 1].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1105              i += 2;
1106              startOpt.setClusterId(args[i]);
1107            }
1108          } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1109            startOpt = StartupOption.ROLLBACK;
1110          } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1111            startOpt = StartupOption.FINALIZE;
1112          } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1113            startOpt = StartupOption.IMPORT;
1114          } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1115            startOpt = StartupOption.BOOTSTRAPSTANDBY;
1116            return startOpt;
1117          } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1118            startOpt = StartupOption.INITIALIZESHAREDEDITS;
1119            for (i = i + 1 ; i < argsLen; i++) {
1120              if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1121                startOpt.setInteractiveFormat(false);
1122              } else if (StartupOption.FORCE.getName().equals(args[i])) {
1123                startOpt.setForceFormat(true);
1124              } else {
1125                LOG.fatal("Invalid argument: " + args[i]);
1126                return null;
1127              }
1128            }
1129            return startOpt;
1130          } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1131            if (startOpt != StartupOption.REGULAR) {
1132              throw new RuntimeException("Can't combine -recover with " +
1133                  "other startup options.");
1134            }
1135            startOpt = StartupOption.RECOVER;
1136            while (++i < argsLen) {
1137              if (args[i].equalsIgnoreCase(
1138                    StartupOption.FORCE.getName())) {
1139                startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1140              } else {
1141                throw new RuntimeException("Error parsing recovery options: " + 
1142                  "can't understand option \"" + args[i] + "\"");
1143              }
1144            }
1145          } else {
1146            return null;
1147          }
1148        }
1149        return startOpt;
1150      }
1151    
1152      private static void setStartupOption(Configuration conf, StartupOption opt) {
1153        conf.set(DFS_NAMENODE_STARTUP_KEY, opt.toString());
1154      }
1155    
1156      static StartupOption getStartupOption(Configuration conf) {
1157        return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1158                                              StartupOption.REGULAR.toString()));
1159      }
1160    
1161      private static void doRecovery(StartupOption startOpt, Configuration conf)
1162          throws IOException {
1163        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1164        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1165        initializeGenericKeys(conf, nsId, namenodeId);
1166        if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1167          if (!confirmPrompt("You have selected Metadata Recovery mode.  " +
1168              "This mode is intended to recover lost metadata on a corrupt " +
1169              "filesystem.  Metadata recovery mode often permanently deletes " +
1170              "data from your HDFS filesystem.  Please back up your edit log " +
1171              "and fsimage before trying this!\n\n" +
1172              "Are you ready to proceed? (Y/N)\n")) {
1173            System.err.println("Recovery aborted at user request.\n");
1174            return;
1175          }
1176        }
1177        MetaRecoveryContext.LOG.info("starting recovery...");
1178        UserGroupInformation.setConfiguration(conf);
1179        NameNode.initMetrics(conf, startOpt.toNodeRole());
1180        FSNamesystem fsn = null;
1181        try {
1182          fsn = FSNamesystem.loadFromDisk(conf);
1183          fsn.saveNamespace();
1184          MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1185        } catch (IOException e) {
1186          MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1187          throw e;
1188        } catch (RuntimeException e) {
1189          MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1190          throw e;
1191        } finally {
1192          if (fsn != null)
1193            fsn.close();
1194        }
1195      }
1196    
1197      public static NameNode createNameNode(String argv[], Configuration conf)
1198          throws IOException {
1199        if (conf == null)
1200          conf = new HdfsConfiguration();
1201        StartupOption startOpt = parseArguments(argv);
1202        if (startOpt == null) {
1203          printUsage(System.err);
1204          return null;
1205        }
1206        setStartupOption(conf, startOpt);
1207        
1208        if (HAUtil.isHAEnabled(conf, DFSUtil.getNamenodeNameServiceId(conf)) &&
1209            (startOpt == StartupOption.UPGRADE ||
1210             startOpt == StartupOption.ROLLBACK ||
1211             startOpt == StartupOption.FINALIZE)) {
1212          throw new HadoopIllegalArgumentException("Invalid startup option. " +
1213              "Cannot perform DFS upgrade with HA enabled.");
1214        }
1215    
1216        switch (startOpt) {
1217          case FORMAT: {
1218            boolean aborted = format(conf, startOpt.getForceFormat(),
1219                startOpt.getInteractiveFormat());
1220            terminate(aborted ? 1 : 0);
1221            return null; // avoid javac warning
1222          }
1223          case GENCLUSTERID: {
1224            System.err.println("Generating new cluster id:");
1225            System.out.println(NNStorage.newClusterID());
1226            terminate(0);
1227            return null;
1228          }
1229          case FINALIZE: {
1230            boolean aborted = finalize(conf, true);
1231            terminate(aborted ? 1 : 0);
1232            return null; // avoid javac warning
1233          }
1234          case BOOTSTRAPSTANDBY: {
1235            String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1236            int rc = BootstrapStandby.run(toolArgs, conf);
1237            terminate(rc);
1238            return null; // avoid warning
1239          }
1240          case INITIALIZESHAREDEDITS: {
1241            boolean aborted = initializeSharedEdits(conf,
1242                startOpt.getForceFormat(),
1243                startOpt.getInteractiveFormat());
1244            terminate(aborted ? 1 : 0);
1245            return null; // avoid warning
1246          }
1247          case BACKUP:
1248          case CHECKPOINT: {
1249            NamenodeRole role = startOpt.toNodeRole();
1250            DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1251            return new BackupNode(conf, role);
1252          }
1253          case RECOVER: {
1254            NameNode.doRecovery(startOpt, conf);
1255            return null;
1256          }
1257          default: {
1258            DefaultMetricsSystem.initialize("NameNode");
1259            return new NameNode(conf);
1260          }
1261        }
1262      }
1263    
1264      /**
1265       * In federation configuration is set for a set of
1266       * namenode and secondary namenode/backup/checkpointer, which are
1267       * grouped under a logical nameservice ID. The configuration keys specific 
1268       * to them have suffix set to configured nameserviceId.
1269       * 
1270       * This method copies the value from specific key of format key.nameserviceId
1271       * to key, to set up the generic configuration. Once this is done, only
1272       * generic version of the configuration is read in rest of the code, for
1273       * backward compatibility and simpler code changes.
1274       * 
1275       * @param conf
1276       *          Configuration object to lookup specific key and to set the value
1277       *          to the key passed. Note the conf object is modified
1278       * @param nameserviceId name service Id (to distinguish federated NNs)
1279       * @param namenodeId the namenode ID (to distinguish HA NNs)
1280       * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1281       */
1282      public static void initializeGenericKeys(Configuration conf,
1283          String nameserviceId, String namenodeId) {
1284        if ((nameserviceId != null && !nameserviceId.isEmpty()) || 
1285            (namenodeId != null && !namenodeId.isEmpty())) {
1286          if (nameserviceId != null) {
1287            conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1288          }
1289          if (namenodeId != null) {
1290            conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1291          }
1292          
1293          DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1294              NAMENODE_SPECIFIC_KEYS);
1295          DFSUtil.setGenericConf(conf, nameserviceId, null,
1296              NAMESERVICE_SPECIFIC_KEYS);
1297        }
1298        
1299        // If the RPC address is set use it to (re-)configure the default FS
1300        if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1301          URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1302              + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1303          conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1304          LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1305        }
1306      }
1307        
1308      /** 
1309       * Get the name service Id for the node
1310       * @return name service Id or null if federation is not configured
1311       */
1312      protected String getNameServiceId(Configuration conf) {
1313        return DFSUtil.getNamenodeNameServiceId(conf);
1314      }
1315      
1316      /**
1317       */
1318      public static void main(String argv[]) throws Exception {
1319        if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1320          System.exit(0);
1321        }
1322    
1323        try {
1324          StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1325          NameNode namenode = createNameNode(argv, null);
1326          if (namenode != null) {
1327            namenode.join();
1328          }
1329        } catch (Throwable e) {
1330          LOG.fatal("Exception in namenode join", e);
1331          terminate(1, e);
1332        }
1333      }
1334    
1335      synchronized void monitorHealth() 
1336          throws HealthCheckFailedException, AccessControlException {
1337        namesystem.checkSuperuserPrivilege();
1338        if (!haEnabled) {
1339          return; // no-op, if HA is not enabled
1340        }
1341        getNamesystem().checkAvailableResources();
1342        if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1343          throw new HealthCheckFailedException(
1344              "The NameNode has no resources available");
1345        }
1346      }
1347      
1348      synchronized void transitionToActive() 
1349          throws ServiceFailedException, AccessControlException {
1350        namesystem.checkSuperuserPrivilege();
1351        if (!haEnabled) {
1352          throw new ServiceFailedException("HA for namenode is not enabled");
1353        }
1354        state.setState(haContext, ACTIVE_STATE);
1355      }
1356      
1357      synchronized void transitionToStandby() 
1358          throws ServiceFailedException, AccessControlException {
1359        namesystem.checkSuperuserPrivilege();
1360        if (!haEnabled) {
1361          throw new ServiceFailedException("HA for namenode is not enabled");
1362        }
1363        state.setState(haContext, STANDBY_STATE);
1364      }
1365    
1366      synchronized HAServiceStatus getServiceStatus()
1367          throws ServiceFailedException, AccessControlException {
1368        namesystem.checkSuperuserPrivilege();
1369        if (!haEnabled) {
1370          throw new ServiceFailedException("HA for namenode is not enabled");
1371        }
1372        if (state == null) {
1373          return new HAServiceStatus(HAServiceState.INITIALIZING);
1374        }
1375        HAServiceState retState = state.getServiceState();
1376        HAServiceStatus ret = new HAServiceStatus(retState);
1377        if (retState == HAServiceState.STANDBY) {
1378          String safemodeTip = namesystem.getSafeModeTip();
1379          if (!safemodeTip.isEmpty()) {
1380            ret.setNotReadyToBecomeActive(
1381                "The NameNode is in safemode. " +
1382                safemodeTip);
1383          } else {
1384            ret.setReadyToBecomeActive();
1385          }
1386        } else if (retState == HAServiceState.ACTIVE) {
1387          ret.setReadyToBecomeActive();
1388        } else {
1389          ret.setNotReadyToBecomeActive("State is " + state);
1390        }
1391        return ret;
1392      }
1393    
1394      synchronized HAServiceState getServiceState() {
1395        if (state == null) {
1396          return HAServiceState.INITIALIZING;
1397        }
1398        return state.getServiceState();
1399      }
1400    
1401      /**
1402       * Register NameNodeStatusMXBean
1403       */
1404      private void registerNNSMXBean() {
1405        nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this);
1406      }
1407    
1408      @Override // NameNodeStatusMXBean
1409      public String getNNRole() {
1410        String roleStr = "";
1411        NamenodeRole role = getRole();
1412        if (null != role) {
1413          roleStr = role.toString();
1414        }
1415        return roleStr;
1416      }
1417    
1418      @Override // NameNodeStatusMXBean
1419      public String getState() {
1420        String servStateStr = "";
1421        HAServiceState servState = getServiceState();
1422        if (null != servState) {
1423          servStateStr = servState.toString();
1424        }
1425        return servStateStr;
1426      }
1427    
1428      @Override // NameNodeStatusMXBean
1429      public String getHostAndPort() {
1430        return getNameNodeAddressHostPortString();
1431      }
1432    
1433      @Override // NameNodeStatusMXBean
1434      public boolean isSecurityEnabled() {
1435        return UserGroupInformation.isSecurityEnabled();
1436      }
1437    
1438      /**
1439       * Shutdown the NN immediately in an ungraceful way. Used when it would be
1440       * unsafe for the NN to continue operating, e.g. during a failed HA state
1441       * transition.
1442       * 
1443       * @param t exception which warrants the shutdown. Printed to the NN log
1444       *          before exit.
1445       * @throws ExitException thrown only for testing.
1446       */
1447      protected synchronized void doImmediateShutdown(Throwable t)
1448          throws ExitException {
1449        String message = "Error encountered requiring NN shutdown. " +
1450            "Shutting down immediately.";
1451        try {
1452          LOG.fatal(message, t);
1453        } catch (Throwable ignored) {
1454          // This is unlikely to happen, but there's nothing we can do if it does.
1455        }
1456        terminate(1, t);
1457      }
1458      
1459      /**
1460       * Class used to expose {@link NameNode} as context to {@link HAState}
1461       */
1462      protected class NameNodeHAContext implements HAContext {
1463        @Override
1464        public void setState(HAState s) {
1465          state = s;
1466        }
1467    
1468        @Override
1469        public HAState getState() {
1470          return state;
1471        }
1472    
1473        @Override
1474        public void startActiveServices() throws IOException {
1475          try {
1476            namesystem.startActiveServices();
1477            startTrashEmptier(conf);
1478          } catch (Throwable t) {
1479            doImmediateShutdown(t);
1480          }
1481        }
1482    
1483        @Override
1484        public void stopActiveServices() throws IOException {
1485          try {
1486            if (namesystem != null) {
1487              namesystem.stopActiveServices();
1488            }
1489            stopTrashEmptier();
1490          } catch (Throwable t) {
1491            doImmediateShutdown(t);
1492          }
1493        }
1494    
1495        @Override
1496        public void startStandbyServices() throws IOException {
1497          try {
1498            namesystem.startStandbyServices(conf);
1499          } catch (Throwable t) {
1500            doImmediateShutdown(t);
1501          }
1502        }
1503    
1504        @Override
1505        public void prepareToStopStandbyServices() throws ServiceFailedException {
1506          try {
1507            namesystem.prepareToStopStandbyServices();
1508          } catch (Throwable t) {
1509            doImmediateShutdown(t);
1510          }
1511        }
1512        
1513        @Override
1514        public void stopStandbyServices() throws IOException {
1515          try {
1516            if (namesystem != null) {
1517              namesystem.stopStandbyServices();
1518            }
1519          } catch (Throwable t) {
1520            doImmediateShutdown(t);
1521          }
1522        }
1523        
1524        @Override
1525        public void writeLock() {
1526          namesystem.writeLock();
1527        }
1528        
1529        @Override
1530        public void writeUnlock() {
1531          namesystem.writeUnlock();
1532        }
1533        
1534        /** Check if an operation of given category is allowed */
1535        @Override
1536        public void checkOperation(final OperationCategory op)
1537            throws StandbyException {
1538          state.checkOperation(haContext, op);
1539        }
1540        
1541        @Override
1542        public boolean allowStaleReads() {
1543          return allowStaleStandbyReads;
1544        }
1545    
1546      }
1547      
1548      public boolean isStandbyState() {
1549        return (state.equals(STANDBY_STATE));
1550      }
1551    
1552      /**
1553       * Check that a request to change this node's HA state is valid.
1554       * In particular, verifies that, if auto failover is enabled, non-forced
1555       * requests from the HAAdmin CLI are rejected, and vice versa.
1556       *
1557       * @param req the request to check
1558       * @throws AccessControlException if the request is disallowed
1559       */
1560      void checkHaStateChange(StateChangeRequestInfo req)
1561          throws AccessControlException {
1562        boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1563            DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1564        switch (req.getSource()) {
1565        case REQUEST_BY_USER:
1566          if (autoHaEnabled) {
1567            throw new AccessControlException(
1568                "Manual HA control for this NameNode is disallowed, because " +
1569                "automatic HA is enabled.");
1570          }
1571          break;
1572        case REQUEST_BY_USER_FORCED:
1573          if (autoHaEnabled) {
1574            LOG.warn("Allowing manual HA control from " +
1575                Server.getRemoteAddress() +
1576                " even though automatic HA is enabled, because the user " +
1577                "specified the force flag");
1578          }
1579          break;
1580        case REQUEST_BY_ZKFC:
1581          if (!autoHaEnabled) {
1582            throw new AccessControlException(
1583                "Request from ZK failover controller at " +
1584                Server.getRemoteAddress() + " denied since automatic HA " +
1585                "is not enabled"); 
1586          }
1587          break;
1588        }
1589      }
1590    }