001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import com.google.common.annotations.VisibleForTesting;
021import com.google.common.base.Joiner;
022import com.google.common.base.Preconditions;
023import com.google.common.collect.Lists;
024
025import org.apache.commons.logging.Log;
026import org.apache.commons.logging.LogFactory;
027import org.apache.hadoop.HadoopIllegalArgumentException;
028import org.apache.hadoop.classification.InterfaceAudience;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Trash;
032import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
033import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
034import org.apache.hadoop.ha.HAServiceStatus;
035import org.apache.hadoop.ha.HealthCheckFailedException;
036import org.apache.hadoop.ha.ServiceFailedException;
037import org.apache.hadoop.hdfs.DFSConfigKeys;
038import org.apache.hadoop.hdfs.DFSUtil;
039import org.apache.hadoop.hdfs.HAUtil;
040import org.apache.hadoop.hdfs.HdfsConfiguration;
041import org.apache.hadoop.hdfs.protocol.ClientProtocol;
042import org.apache.hadoop.hdfs.protocol.HdfsConstants;
043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
045import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
046import org.apache.hadoop.hdfs.server.namenode.ha.*;
047import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
048import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
049import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
050import org.apache.hadoop.hdfs.server.protocol.*;
051import org.apache.hadoop.ipc.Server;
052import org.apache.hadoop.ipc.StandbyException;
053import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
054import org.apache.hadoop.metrics2.util.MBeans;
055import org.apache.hadoop.net.NetUtils;
056import org.apache.hadoop.security.AccessControlException;
057import org.apache.hadoop.security.RefreshUserMappingsProtocol;
058import org.apache.hadoop.security.SecurityUtil;
059import org.apache.hadoop.security.UserGroupInformation;
060import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
061import org.apache.hadoop.ipc.RefreshCallQueueProtocol;
062import org.apache.hadoop.tools.GetUserMappingsProtocol;
063import org.apache.hadoop.util.ExitUtil.ExitException;
064import org.apache.hadoop.util.JvmPauseMonitor;
065import org.apache.hadoop.util.ServicePlugin;
066import org.apache.hadoop.util.StringUtils;
067
068import javax.management.ObjectName;
069
070import java.io.IOException;
071import java.io.PrintStream;
072import java.net.InetSocketAddress;
073import java.net.URI;
074import java.security.PrivilegedExceptionAction;
075import java.util.ArrayList;
076import java.util.Arrays;
077import java.util.Collection;
078import java.util.List;
079
080import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
081import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
082import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
083import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
084import static org.apache.hadoop.util.ExitUtil.terminate;
085import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
086
087/**********************************************************
088 * NameNode serves as both directory namespace manager and
089 * "inode table" for the Hadoop DFS.  There is a single NameNode
090 * running in any DFS deployment.  (Well, except when there
091 * is a second backup/failover NameNode, or when using federated NameNodes.)
092 *
093 * The NameNode controls two critical tables:
094 *   1)  filename->blocksequence (namespace)
095 *   2)  block->machinelist ("inodes")
096 *
097 * The first table is stored on disk and is very precious.
098 * The second table is rebuilt every time the NameNode comes up.
099 *
100 * 'NameNode' refers to both this class as well as the 'NameNode server'.
101 * The 'FSNamesystem' class actually performs most of the filesystem
102 * management.  The majority of the 'NameNode' class itself is concerned
103 * with exposing the IPC interface and the HTTP server to the outside world,
104 * plus some configuration management.
105 *
106 * NameNode implements the
107 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
108 * allows clients to ask for DFS services.
109 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
110 * direct use by authors of DFS client code.  End-users should instead use the
111 * {@link org.apache.hadoop.fs.FileSystem} class.
112 *
113 * NameNode also implements the
114 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
115 * used by DataNodes that actually store DFS data blocks.  These
116 * methods are invoked repeatedly and automatically by all the
117 * DataNodes in a DFS deployment.
118 *
119 * NameNode also implements the
120 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
121 * used by secondary namenodes or rebalancing processes to get partial
122 * NameNode state, for example partial blocksMap etc.
123 **********************************************************/
124@InterfaceAudience.Private
125public class NameNode implements NameNodeStatusMXBean {
126  static{
127    HdfsConfiguration.init();
128  }
129
130  /**
131   * Categories of operations supported by the namenode.
132   */
133  public static enum OperationCategory {
134    /** Operations that are state agnostic */
135    UNCHECKED,
136    /** Read operation that does not change the namespace state */
137    READ,
138    /** Write operation that changes the namespace state */
139    WRITE,
140    /** Operations related to checkpointing */
141    CHECKPOINT,
142    /** Operations related to {@link JournalProtocol} */
143    JOURNAL
144  }
145  
146  /**
147   * HDFS configuration can have three types of parameters:
148   * <ol>
149   * <li>Parameters that are common for all the name services in the cluster.</li>
150   * <li>Parameters that are specific to a name service. These keys are suffixed
151   * with nameserviceId in the configuration. For example,
152   * "dfs.namenode.rpc-address.nameservice1".</li>
153   * <li>Parameters that are specific to a single name node. These keys are suffixed
154   * with nameserviceId and namenodeId in the configuration. for example,
155   * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
156   * </ol>
157   * 
158   * In the latter cases, operators may specify the configuration without
159   * any suffix, with a nameservice suffix, or with a nameservice and namenode
160   * suffix. The more specific suffix will take precedence.
161   * 
162   * These keys are specific to a given namenode, and thus may be configured
163   * globally, for a nameservice, or for a specific namenode within a nameservice.
164   */
165  public static final String[] NAMENODE_SPECIFIC_KEYS = {
166    DFS_NAMENODE_RPC_ADDRESS_KEY,
167    DFS_NAMENODE_RPC_BIND_HOST_KEY,
168    DFS_NAMENODE_NAME_DIR_KEY,
169    DFS_NAMENODE_EDITS_DIR_KEY,
170    DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
171    DFS_NAMENODE_CHECKPOINT_DIR_KEY,
172    DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
173    DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
174    DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
175    DFS_NAMENODE_HTTP_ADDRESS_KEY,
176    DFS_NAMENODE_HTTPS_ADDRESS_KEY,
177    DFS_NAMENODE_HTTP_BIND_HOST_KEY,
178    DFS_NAMENODE_HTTPS_BIND_HOST_KEY,
179    DFS_NAMENODE_KEYTAB_FILE_KEY,
180    DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
181    DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY,
182    DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
183    DFS_NAMENODE_BACKUP_ADDRESS_KEY,
184    DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
185    DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
186    DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY,
187    DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY,
188    DFS_HA_FENCE_METHODS_KEY,
189    DFS_HA_ZKFC_PORT_KEY,
190    DFS_HA_FENCE_METHODS_KEY
191  };
192  
193  /**
194   * @see #NAMENODE_SPECIFIC_KEYS
195   * These keys are specific to a nameservice, but may not be overridden
196   * for a specific namenode.
197   */
198  public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
199    DFS_HA_AUTO_FAILOVER_ENABLED_KEY
200  };
201  
202  private static final String USAGE = "Usage: java NameNode ["
203      + StartupOption.BACKUP.getName() + "] | \n\t["
204      + StartupOption.CHECKPOINT.getName() + "] | \n\t["
205      + StartupOption.FORMAT.getName() + " ["
206      + StartupOption.CLUSTERID.getName() + " cid ] ["
207      + StartupOption.FORCE.getName() + "] ["
208      + StartupOption.NONINTERACTIVE.getName() + "] ] | \n\t["
209      + StartupOption.UPGRADE.getName() + 
210        " [" + StartupOption.CLUSTERID.getName() + " cid]" +
211        " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t["
212      + StartupOption.ROLLBACK.getName() + "] | \n\t["
213      + StartupOption.ROLLINGUPGRADE.getName() + " <"
214      + RollingUpgradeStartupOption.DOWNGRADE.name().toLowerCase() + "|"
215      + RollingUpgradeStartupOption.ROLLBACK.name().toLowerCase() + "> ] | \n\t["
216      + StartupOption.FINALIZE.getName() + "] | \n\t["
217      + StartupOption.IMPORT.getName() + "] | \n\t["
218      + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | \n\t["
219      + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | \n\t["
220      + StartupOption.RECOVER.getName() + " [ "
221      + StartupOption.FORCE.getName() + "] ] | \n\t["
222      + StartupOption.METADATAVERSION.getName() + " ] "
223      + " ]";
224
225  
226  public long getProtocolVersion(String protocol, 
227                                 long clientVersion) throws IOException {
228    if (protocol.equals(ClientProtocol.class.getName())) {
229      return ClientProtocol.versionID; 
230    } else if (protocol.equals(DatanodeProtocol.class.getName())){
231      return DatanodeProtocol.versionID;
232    } else if (protocol.equals(NamenodeProtocol.class.getName())){
233      return NamenodeProtocol.versionID;
234    } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
235      return RefreshAuthorizationPolicyProtocol.versionID;
236    } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
237      return RefreshUserMappingsProtocol.versionID;
238    } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) {
239      return RefreshCallQueueProtocol.versionID;
240    } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
241      return GetUserMappingsProtocol.versionID;
242    } else {
243      throw new IOException("Unknown protocol to name node: " + protocol);
244    }
245  }
246    
247  public static final int DEFAULT_PORT = 8020;
248  public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
249  public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
250  public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
251  public static final HAState ACTIVE_STATE = new ActiveState();
252  public static final HAState STANDBY_STATE = new StandbyState();
253  
254  protected FSNamesystem namesystem; 
255  protected final Configuration conf;
256  protected final NamenodeRole role;
257  private volatile HAState state;
258  private final boolean haEnabled;
259  private final HAContext haContext;
260  protected final boolean allowStaleStandbyReads;
261
262  
263  /** httpServer */
264  protected NameNodeHttpServer httpServer;
265  private Thread emptier;
266  /** only used for testing purposes  */
267  protected boolean stopRequested = false;
268  /** Registration information of this name-node  */
269  protected NamenodeRegistration nodeRegistration;
270  /** Activated plug-ins. */
271  private List<ServicePlugin> plugins;
272  
273  private NameNodeRpcServer rpcServer;
274
275  private JvmPauseMonitor pauseMonitor;
276  private ObjectName nameNodeStatusBeanName;
277  /**
278   * The namenode address that clients will use to access this namenode
279   * or the name service. For HA configurations using logical URI, it
280   * will be the logical address.
281   */
282  private String clientNamenodeAddress;
283  
284  /** Format a new filesystem.  Destroys any filesystem that may already
285   * exist at this location.  **/
286  public static void format(Configuration conf) throws IOException {
287    format(conf, true, true);
288  }
289
290  static NameNodeMetrics metrics;
291  private static final StartupProgress startupProgress = new StartupProgress();
292  /** Return the {@link FSNamesystem} object.
293   * @return {@link FSNamesystem} object.
294   */
295  public FSNamesystem getNamesystem() {
296    return namesystem;
297  }
298
299  public NamenodeProtocols getRpcServer() {
300    return rpcServer;
301  }
302  
303  static void initMetrics(Configuration conf, NamenodeRole role) {
304    metrics = NameNodeMetrics.create(conf, role);
305  }
306
307  public static NameNodeMetrics getNameNodeMetrics() {
308    return metrics;
309  }
310
311  /**
312   * Returns object used for reporting namenode startup progress.
313   * 
314   * @return StartupProgress for reporting namenode startup progress
315   */
316  public static StartupProgress getStartupProgress() {
317    return startupProgress;
318  }
319
320  /**
321   * Return the service name of the issued delegation token.
322   *
323   * @return The name service id in HA-mode, or the rpc address in non-HA mode
324   */
325  public String getTokenServiceName() {
326    return getClientNamenodeAddress();
327  }
328
329  /**
330   * Set the namenode address that will be used by clients to access this
331   * namenode or name service. This needs to be called before the config
332   * is overriden.
333   */
334  public void setClientNamenodeAddress(Configuration conf) {
335    String nnAddr = conf.get(FS_DEFAULT_NAME_KEY);
336    if (nnAddr == null) {
337      // default fs is not set.
338      clientNamenodeAddress = null;
339      return;
340    }
341
342    LOG.info(FS_DEFAULT_NAME_KEY + " is " + nnAddr);
343    URI nnUri = URI.create(nnAddr);
344
345    String nnHost = nnUri.getHost();
346    if (nnHost == null) {
347      clientNamenodeAddress = null;
348      return;
349    }
350
351    if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) {
352      // host name is logical
353      clientNamenodeAddress = nnHost;
354    } else if (nnUri.getPort() > 0) {
355      // physical address with a valid port
356      clientNamenodeAddress = nnUri.getAuthority();
357    } else {
358      // the port is missing or 0. Figure out real bind address later.
359      clientNamenodeAddress = null;
360      return;
361    }
362    LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
363        + " this namenode/service.");
364  }
365
366  /**
367   * Get the namenode address to be used by clients.
368   * @return nn address
369   */
370  public String getClientNamenodeAddress() {
371    return clientNamenodeAddress;
372  }
373
374  public static InetSocketAddress getAddress(String address) {
375    return NetUtils.createSocketAddr(address, DEFAULT_PORT);
376  }
377  
378  /**
379   * Set the configuration property for the service rpc address
380   * to address
381   */
382  public static void setServiceAddress(Configuration conf,
383                                           String address) {
384    LOG.info("Setting ADDRESS " + address);
385    conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
386  }
387  
388  /**
389   * Fetches the address for services to use when connecting to namenode
390   * based on the value of fallback returns null if the special
391   * address is not specified or returns the default namenode address
392   * to be used by both clients and services.
393   * Services here are datanodes, backup node, any non client connection
394   */
395  public static InetSocketAddress getServiceAddress(Configuration conf,
396                                                        boolean fallback) {
397    String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
398    if (addr == null || addr.isEmpty()) {
399      return fallback ? getAddress(conf) : null;
400    }
401    return getAddress(addr);
402  }
403
404  public static InetSocketAddress getAddress(Configuration conf) {
405    URI filesystemURI = FileSystem.getDefaultUri(conf);
406    return getAddress(filesystemURI);
407  }
408
409
410  /**
411   * @return address of file system
412   */
413  public static InetSocketAddress getAddress(URI filesystemURI) {
414    String authority = filesystemURI.getAuthority();
415    if (authority == null) {
416      throw new IllegalArgumentException(String.format(
417          "Invalid URI for NameNode address (check %s): %s has no authority.",
418          FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
419    }
420    if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
421        filesystemURI.getScheme())) {
422      throw new IllegalArgumentException(String.format(
423          "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
424          FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
425          HdfsConstants.HDFS_URI_SCHEME));
426    }
427    return getAddress(authority);
428  }
429
430  public static URI getUri(InetSocketAddress namenode) {
431    int port = namenode.getPort();
432    String portString = port == DEFAULT_PORT ? "" : (":"+port);
433    return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 
434        + namenode.getHostName()+portString);
435  }
436
437  //
438  // Common NameNode methods implementation for the active name-node role.
439  //
440  public NamenodeRole getRole() {
441    return role;
442  }
443
444  boolean isRole(NamenodeRole that) {
445    return role.equals(that);
446  }
447
448  /**
449   * Given a configuration get the address of the service rpc server
450   * If the service rpc is not configured returns null
451   */
452  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
453    return NameNode.getServiceAddress(conf, false);
454  }
455
456  protected InetSocketAddress getRpcServerAddress(Configuration conf) {
457    return getAddress(conf);
458  }
459  
460  /** Given a configuration get the bind host of the service rpc server
461   *  If the bind host is not configured returns null.
462   */
463  protected String getServiceRpcServerBindHost(Configuration conf) {
464    String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
465    if (addr == null || addr.isEmpty()) {
466      return null;
467    }
468    return addr;
469  }
470
471  /** Given a configuration get the bind host of the client rpc server
472   *  If the bind host is not configured returns null.
473   */
474  protected String getRpcServerBindHost(Configuration conf) {
475    String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
476    if (addr == null || addr.isEmpty()) {
477      return null;
478    }
479    return addr;
480  }
481   
482  /**
483   * Modifies the configuration passed to contain the service rpc address setting
484   */
485  protected void setRpcServiceServerAddress(Configuration conf,
486      InetSocketAddress serviceRPCAddress) {
487    setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
488  }
489
490  protected void setRpcServerAddress(Configuration conf,
491      InetSocketAddress rpcAddress) {
492    FileSystem.setDefaultUri(conf, getUri(rpcAddress));
493  }
494
495  protected InetSocketAddress getHttpServerAddress(Configuration conf) {
496    return getHttpAddress(conf);
497  }
498
499  /**
500   * HTTP server address for binding the endpoint. This method is
501   * for use by the NameNode and its derivatives. It may return
502   * a different address than the one that should be used by clients to
503   * connect to the NameNode. See
504   * {@link DFSConfigKeys#DFS_NAMENODE_HTTP_BIND_HOST_KEY}
505   *
506   * @param conf
507   * @return
508   */
509  protected InetSocketAddress getHttpServerBindAddress(Configuration conf) {
510    InetSocketAddress bindAddress = getHttpServerAddress(conf);
511
512    // If DFS_NAMENODE_HTTP_BIND_HOST_KEY exists then it overrides the
513    // host name portion of DFS_NAMENODE_HTTP_ADDRESS_KEY.
514    final String bindHost = conf.getTrimmed(DFS_NAMENODE_HTTP_BIND_HOST_KEY);
515    if (bindHost != null && !bindHost.isEmpty()) {
516      bindAddress = new InetSocketAddress(bindHost, bindAddress.getPort());
517    }
518
519    return bindAddress;
520  }
521
522  /** @return the NameNode HTTP address. */
523  public static InetSocketAddress getHttpAddress(Configuration conf) {
524    return  NetUtils.createSocketAddr(
525        conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
526  }
527
528  protected void loadNamesystem(Configuration conf) throws IOException {
529    this.namesystem = FSNamesystem.loadFromDisk(conf);
530  }
531
532  NamenodeRegistration getRegistration() {
533    return nodeRegistration;
534  }
535
536  NamenodeRegistration setRegistration() {
537    nodeRegistration = new NamenodeRegistration(
538        NetUtils.getHostPortString(rpcServer.getRpcAddress()),
539        NetUtils.getHostPortString(getHttpAddress()),
540        getFSImage().getStorage(), getRole());
541    return nodeRegistration;
542  }
543
544  /* optimize ugi lookup for RPC operations to avoid a trip through
545   * UGI.getCurrentUser which is synch'ed
546   */
547  public static UserGroupInformation getRemoteUser() throws IOException {
548    UserGroupInformation ugi = Server.getRemoteUser();
549    return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
550  }
551
552
553  /**
554   * Login as the configured user for the NameNode.
555   */
556  void loginAsNameNodeUser(Configuration conf) throws IOException {
557    InetSocketAddress socAddr = getRpcServerAddress(conf);
558    SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
559        DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName());
560  }
561  
562  /**
563   * Initialize name-node.
564   * 
565   * @param conf the configuration
566   */
567  protected void initialize(Configuration conf) throws IOException {
568    if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
569      String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
570      if (intervals != null) {
571        conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
572          intervals);
573      }
574    }
575
576    UserGroupInformation.setConfiguration(conf);
577    loginAsNameNodeUser(conf);
578
579    NameNode.initMetrics(conf, this.getRole());
580    StartupProgressMetrics.register(startupProgress);
581
582    if (NamenodeRole.NAMENODE == role) {
583      startHttpServer(conf);
584    }
585    loadNamesystem(conf);
586
587    rpcServer = createRpcServer(conf);
588    if (clientNamenodeAddress == null) {
589      // This is expected for MiniDFSCluster. Set it now using 
590      // the RPC server's bind address.
591      clientNamenodeAddress = 
592          NetUtils.getHostPortString(rpcServer.getRpcAddress());
593      LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
594          + " this namenode/service.");
595    }
596    if (NamenodeRole.NAMENODE == role) {
597      httpServer.setNameNodeAddress(getNameNodeAddress());
598      httpServer.setFSImage(getFSImage());
599    }
600    
601    pauseMonitor = new JvmPauseMonitor(conf);
602    pauseMonitor.start();
603    metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
604    
605    startCommonServices(conf);
606  }
607  
608  /**
609   * Create the RPC server implementation. Used as an extension point for the
610   * BackupNode.
611   */
612  protected NameNodeRpcServer createRpcServer(Configuration conf)
613      throws IOException {
614    return new NameNodeRpcServer(conf, this);
615  }
616
617  /** Start the services common to active and standby states */
618  private void startCommonServices(Configuration conf) throws IOException {
619    namesystem.startCommonServices(conf, haContext);
620    registerNNSMXBean();
621    if (NamenodeRole.NAMENODE != role) {
622      startHttpServer(conf);
623      httpServer.setNameNodeAddress(getNameNodeAddress());
624      httpServer.setFSImage(getFSImage());
625    }
626    rpcServer.start();
627    plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
628        ServicePlugin.class);
629    for (ServicePlugin p: plugins) {
630      try {
631        p.start(this);
632      } catch (Throwable t) {
633        LOG.warn("ServicePlugin " + p + " could not be started", t);
634      }
635    }
636    LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
637    if (rpcServer.getServiceRpcAddress() != null) {
638      LOG.info(getRole() + " service RPC up at: "
639          + rpcServer.getServiceRpcAddress());
640    }
641  }
642  
643  private void stopCommonServices() {
644    if(rpcServer != null) rpcServer.stop();
645    if(namesystem != null) namesystem.close();
646    if (pauseMonitor != null) pauseMonitor.stop();
647    if (plugins != null) {
648      for (ServicePlugin p : plugins) {
649        try {
650          p.stop();
651        } catch (Throwable t) {
652          LOG.warn("ServicePlugin " + p + " could not be stopped", t);
653        }
654      }
655    }   
656    stopHttpServer();
657  }
658  
659  private void startTrashEmptier(final Configuration conf) throws IOException {
660    long trashInterval =
661        conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
662    if (trashInterval == 0) {
663      return;
664    } else if (trashInterval < 0) {
665      throw new IOException("Cannot start trash emptier with negative interval."
666          + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
667    }
668    
669    // This may be called from the transitionToActive code path, in which
670    // case the current user is the administrator, not the NN. The trash
671    // emptier needs to run as the NN. See HDFS-3972.
672    FileSystem fs = SecurityUtil.doAsLoginUser(
673        new PrivilegedExceptionAction<FileSystem>() {
674          @Override
675          public FileSystem run() throws IOException {
676            return FileSystem.get(conf);
677          }
678        });
679    this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
680    this.emptier.setDaemon(true);
681    this.emptier.start();
682  }
683  
684  private void stopTrashEmptier() {
685    if (this.emptier != null) {
686      emptier.interrupt();
687      emptier = null;
688    }
689  }
690  
691  private void startHttpServer(final Configuration conf) throws IOException {
692    httpServer = new NameNodeHttpServer(conf, this, getHttpServerBindAddress(conf));
693    httpServer.start();
694    httpServer.setStartupProgress(startupProgress);
695  }
696  
697  private void stopHttpServer() {
698    try {
699      if (httpServer != null) httpServer.stop();
700    } catch (Exception e) {
701      LOG.error("Exception while stopping httpserver", e);
702    }
703  }
704
705  /**
706   * Start NameNode.
707   * <p>
708   * The name-node can be started with one of the following startup options:
709   * <ul> 
710   * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
711   * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
712   * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
713   * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
714   * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster  
715   * upgrade and create a snapshot of the current file system state</li> 
716   * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node
717   * metadata</li>
718   * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the  
719   *            cluster back to the previous state</li>
720   * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 
721   *            previous upgrade</li>
722   * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
723   * </ul>
724   * The option is passed via configuration field: 
725   * <tt>dfs.namenode.startup</tt>
726   * 
727   * The conf will be modified to reflect the actual ports on which 
728   * the NameNode is up and running if the user passes the port as
729   * <code>zero</code> in the conf.
730   * 
731   * @param conf  confirguration
732   * @throws IOException
733   */
734  public NameNode(Configuration conf) throws IOException {
735    this(conf, NamenodeRole.NAMENODE);
736  }
737
738  protected NameNode(Configuration conf, NamenodeRole role) 
739      throws IOException { 
740    this.conf = conf;
741    this.role = role;
742    setClientNamenodeAddress(conf);
743    String nsId = getNameServiceId(conf);
744    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
745    this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
746    state = createHAState(getStartupOption(conf));
747    this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
748    this.haContext = createHAContext();
749    try {
750      initializeGenericKeys(conf, nsId, namenodeId);
751      initialize(conf);
752      try {
753        haContext.writeLock();
754        state.prepareToEnterState(haContext);
755        state.enterState(haContext);
756      } finally {
757        haContext.writeUnlock();
758      }
759    } catch (IOException e) {
760      this.stop();
761      throw e;
762    } catch (HadoopIllegalArgumentException e) {
763      this.stop();
764      throw e;
765    }
766  }
767
768  protected HAState createHAState(StartupOption startOpt) {
769    if (!haEnabled || startOpt == StartupOption.UPGRADE) {
770      return ACTIVE_STATE;
771    } else {
772      return STANDBY_STATE;
773    }
774  }
775
776  protected HAContext createHAContext() {
777    return new NameNodeHAContext();
778  }
779
780  /**
781   * Wait for service to finish.
782   * (Normally, it runs forever.)
783   */
784  public void join() {
785    try {
786      rpcServer.join();
787    } catch (InterruptedException ie) {
788      LOG.info("Caught interrupted exception ", ie);
789    }
790  }
791
792  /**
793   * Stop all NameNode threads and wait for all to finish.
794   */
795  public void stop() {
796    synchronized(this) {
797      if (stopRequested)
798        return;
799      stopRequested = true;
800    }
801    try {
802      if (state != null) {
803        state.exitState(haContext);
804      }
805    } catch (ServiceFailedException e) {
806      LOG.warn("Encountered exception while exiting state ", e);
807    } finally {
808      stopCommonServices();
809      if (metrics != null) {
810        metrics.shutdown();
811      }
812      if (namesystem != null) {
813        namesystem.shutdown();
814      }
815      if (nameNodeStatusBeanName != null) {
816        MBeans.unregister(nameNodeStatusBeanName);
817        nameNodeStatusBeanName = null;
818      }
819    }
820  }
821
822  synchronized boolean isStopRequested() {
823    return stopRequested;
824  }
825
826  /**
827   * Is the cluster currently in safe mode?
828   */
829  public boolean isInSafeMode() {
830    return namesystem.isInSafeMode();
831  }
832    
833  /** get FSImage */
834  @VisibleForTesting
835  public FSImage getFSImage() {
836    return namesystem.getFSImage();
837  }
838
839  /**
840   * @return NameNode RPC address
841   */
842  public InetSocketAddress getNameNodeAddress() {
843    return rpcServer.getRpcAddress();
844  }
845
846  /**
847   * @return NameNode RPC address in "host:port" string form
848   */
849  public String getNameNodeAddressHostPortString() {
850    return NetUtils.getHostPortString(rpcServer.getRpcAddress());
851  }
852
853  /**
854   * @return NameNode service RPC address if configured, the
855   *    NameNode RPC address otherwise
856   */
857  public InetSocketAddress getServiceRpcAddress() {
858    final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
859    return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
860  }
861
862  /**
863   * @return NameNode HTTP address, used by the Web UI, image transfer,
864   *    and HTTP-based file system clients like Hftp and WebHDFS
865   */
866  public InetSocketAddress getHttpAddress() {
867    return httpServer.getHttpAddress();
868  }
869
870  /**
871   * @return NameNode HTTPS address, used by the Web UI, image transfer,
872   *    and HTTP-based file system clients like Hftp and WebHDFS
873   */
874  public InetSocketAddress getHttpsAddress() {
875    return httpServer.getHttpsAddress();
876  }
877
878  /**
879   * Verify that configured directories exist, then
880   * Interactively confirm that formatting is desired 
881   * for each existing directory and format them.
882   * 
883   * @param conf configuration to use
884   * @param force if true, format regardless of whether dirs exist
885   * @return true if formatting was aborted, false otherwise
886   * @throws IOException
887   */
888  private static boolean format(Configuration conf, boolean force,
889      boolean isInteractive) throws IOException {
890    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
891    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
892    initializeGenericKeys(conf, nsId, namenodeId);
893    checkAllowFormat(conf);
894
895    if (UserGroupInformation.isSecurityEnabled()) {
896      InetSocketAddress socAddr = getAddress(conf);
897      SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
898          DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName());
899    }
900    
901    Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
902    List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
903    List<URI> dirsToPrompt = new ArrayList<URI>();
904    dirsToPrompt.addAll(nameDirsToFormat);
905    dirsToPrompt.addAll(sharedDirs);
906    List<URI> editDirsToFormat = 
907                 FSNamesystem.getNamespaceEditsDirs(conf);
908
909    // if clusterID is not provided - see if you can find the current one
910    String clusterId = StartupOption.FORMAT.getClusterId();
911    if(clusterId == null || clusterId.equals("")) {
912      //Generate a new cluster id
913      clusterId = NNStorage.newClusterID();
914    }
915    System.out.println("Formatting using clusterid: " + clusterId);
916    
917    FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
918    try {
919      FSNamesystem fsn = new FSNamesystem(conf, fsImage);
920      fsImage.getEditLog().initJournalsForWrite();
921
922      if (!fsImage.confirmFormat(force, isInteractive)) {
923        return true; // aborted
924      }
925
926      fsImage.format(fsn, clusterId);
927    } catch (IOException ioe) {
928      LOG.warn("Encountered exception during format: ", ioe);
929      fsImage.close();
930      throw ioe;
931    }
932    return false;
933  }
934
935  public static void checkAllowFormat(Configuration conf) throws IOException {
936    if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
937        DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
938      throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
939                + " is set to false for this filesystem, so it "
940                + "cannot be formatted. You will need to set "
941                + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
942                + "to true in order to format this filesystem");
943    }
944  }
945  
946  @VisibleForTesting
947  public static boolean initializeSharedEdits(Configuration conf) throws IOException {
948    return initializeSharedEdits(conf, true);
949  }
950  
951  @VisibleForTesting
952  public static boolean initializeSharedEdits(Configuration conf,
953      boolean force) throws IOException {
954    return initializeSharedEdits(conf, force, false);
955  }
956
957  /**
958   * Clone the supplied configuration but remove the shared edits dirs.
959   *
960   * @param conf Supplies the original configuration.
961   * @return Cloned configuration without the shared edit dirs.
962   * @throws IOException on failure to generate the configuration.
963   */
964  private static Configuration getConfigurationWithoutSharedEdits(
965      Configuration conf)
966      throws IOException {
967    List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
968    String editsDirsString = Joiner.on(",").join(editsDirs);
969
970    Configuration confWithoutShared = new Configuration(conf);
971    confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
972    confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
973        editsDirsString);
974    return confWithoutShared;
975  }
976
977  /**
978   * Format a new shared edits dir and copy in enough edit log segments so that
979   * the standby NN can start up.
980   * 
981   * @param conf configuration
982   * @param force format regardless of whether or not the shared edits dir exists
983   * @param interactive prompt the user when a dir exists
984   * @return true if the command aborts, false otherwise
985   */
986  private static boolean initializeSharedEdits(Configuration conf,
987      boolean force, boolean interactive) throws IOException {
988    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
989    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
990    initializeGenericKeys(conf, nsId, namenodeId);
991    
992    if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
993      LOG.fatal("No shared edits directory configured for namespace " +
994          nsId + " namenode " + namenodeId);
995      return false;
996    }
997
998    if (UserGroupInformation.isSecurityEnabled()) {
999      InetSocketAddress socAddr = getAddress(conf);
1000      SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
1001          DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName());
1002    }
1003
1004    NNStorage existingStorage = null;
1005    FSImage sharedEditsImage = null;
1006    try {
1007      FSNamesystem fsns =
1008          FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
1009      
1010      existingStorage = fsns.getFSImage().getStorage();
1011      NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
1012      
1013      List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
1014      
1015      sharedEditsImage = new FSImage(conf,
1016          Lists.<URI>newArrayList(),
1017          sharedEditsDirs);
1018      sharedEditsImage.getEditLog().initJournalsForWrite();
1019      
1020      if (!sharedEditsImage.confirmFormat(force, interactive)) {
1021        return true; // abort
1022      }
1023      
1024      NNStorage newSharedStorage = sharedEditsImage.getStorage();
1025      // Call Storage.format instead of FSImage.format here, since we don't
1026      // actually want to save a checkpoint - just prime the dirs with
1027      // the existing namespace info
1028      newSharedStorage.format(nsInfo);
1029      sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
1030
1031      // Need to make sure the edit log segments are in good shape to initialize
1032      // the shared edits dir.
1033      fsns.getFSImage().getEditLog().close();
1034      fsns.getFSImage().getEditLog().initJournalsForWrite();
1035      fsns.getFSImage().getEditLog().recoverUnclosedStreams();
1036
1037      copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
1038          conf);
1039    } catch (IOException ioe) {
1040      LOG.error("Could not initialize shared edits dir", ioe);
1041      return true; // aborted
1042    } finally {
1043      if (sharedEditsImage != null) {
1044        try {
1045          sharedEditsImage.close();
1046        }  catch (IOException ioe) {
1047          LOG.warn("Could not close sharedEditsImage", ioe);
1048        }
1049      }
1050      // Have to unlock storage explicitly for the case when we're running in a
1051      // unit test, which runs in the same JVM as NNs.
1052      if (existingStorage != null) {
1053        try {
1054          existingStorage.unlockAll();
1055        } catch (IOException ioe) {
1056          LOG.warn("Could not unlock storage directories", ioe);
1057          return true; // aborted
1058        }
1059      }
1060    }
1061    return false; // did not abort
1062  }
1063
1064  private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
1065      Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
1066      Configuration conf) throws IOException {
1067    Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
1068        "No shared edits specified");
1069    // Copy edit log segments into the new shared edits dir.
1070    List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
1071    FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
1072        sharedEditsUris);
1073    newSharedEditLog.initJournalsForWrite();
1074    newSharedEditLog.recoverUnclosedStreams();
1075    
1076    FSEditLog sourceEditLog = fsns.getFSImage().editLog;
1077    
1078    long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
1079    
1080    Collection<EditLogInputStream> streams = null;
1081    try {
1082      streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
1083
1084      // Set the nextTxid to the CheckpointTxId+1
1085      newSharedEditLog.setNextTxId(fromTxId + 1);
1086
1087      // Copy all edits after last CheckpointTxId to shared edits dir
1088      for (EditLogInputStream stream : streams) {
1089        LOG.debug("Beginning to copy stream " + stream + " to shared edits");
1090        FSEditLogOp op;
1091        boolean segmentOpen = false;
1092        while ((op = stream.readOp()) != null) {
1093          if (LOG.isTraceEnabled()) {
1094            LOG.trace("copying op: " + op);
1095          }
1096          if (!segmentOpen) {
1097            newSharedEditLog.startLogSegment(op.txid, false);
1098            segmentOpen = true;
1099          }
1100
1101          newSharedEditLog.logEdit(op);
1102
1103          if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1104            newSharedEditLog.logSync();
1105            newSharedEditLog.endCurrentLogSegment(false);
1106            LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1107                + stream);
1108            segmentOpen = false;
1109          }
1110        }
1111
1112        if (segmentOpen) {
1113          LOG.debug("ending log segment because of end of stream in " + stream);
1114          newSharedEditLog.logSync();
1115          newSharedEditLog.endCurrentLogSegment(false);
1116          segmentOpen = false;
1117        }
1118      }
1119    } finally {
1120      if (streams != null) {
1121        FSEditLog.closeAllStreams(streams);
1122      }
1123    }
1124  }
1125  
1126  @VisibleForTesting
1127  public static boolean doRollback(Configuration conf,
1128      boolean isConfirmationNeeded) throws IOException {
1129    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1130    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1131    initializeGenericKeys(conf, nsId, namenodeId);
1132
1133    FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1134    System.err.print(
1135        "\"rollBack\" will remove the current state of the file system,\n"
1136        + "returning you to the state prior to initiating your recent.\n"
1137        + "upgrade. This action is permanent and cannot be undone. If you\n"
1138        + "are performing a rollback in an HA environment, you should be\n"
1139        + "certain that no NameNode process is running on any host.");
1140    if (isConfirmationNeeded) {
1141      if (!confirmPrompt("Roll back file system state?")) {
1142        System.err.println("Rollback aborted.");
1143        return true;
1144      }
1145    }
1146    nsys.getFSImage().doRollback(nsys);
1147    return false;
1148  }
1149
1150  private static void printUsage(PrintStream out) {
1151    out.println(USAGE + "\n");
1152  }
1153
1154  @VisibleForTesting
1155  static StartupOption parseArguments(String args[]) {
1156    int argsLen = (args == null) ? 0 : args.length;
1157    StartupOption startOpt = StartupOption.REGULAR;
1158    for(int i=0; i < argsLen; i++) {
1159      String cmd = args[i];
1160      if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1161        startOpt = StartupOption.FORMAT;
1162        for (i = i + 1; i < argsLen; i++) {
1163          if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1164            i++;
1165            if (i >= argsLen) {
1166              // if no cluster id specified, return null
1167              LOG.fatal("Must specify a valid cluster ID after the "
1168                  + StartupOption.CLUSTERID.getName() + " flag");
1169              return null;
1170            }
1171            String clusterId = args[i];
1172            // Make sure an id is specified and not another flag
1173            if (clusterId.isEmpty() ||
1174                clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1175                clusterId.equalsIgnoreCase(
1176                    StartupOption.NONINTERACTIVE.getName())) {
1177              LOG.fatal("Must specify a valid cluster ID after the "
1178                  + StartupOption.CLUSTERID.getName() + " flag");
1179              return null;
1180            }
1181            startOpt.setClusterId(clusterId);
1182          }
1183
1184          if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1185            startOpt.setForceFormat(true);
1186          }
1187
1188          if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1189            startOpt.setInteractiveFormat(false);
1190          }
1191        }
1192      } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1193        startOpt = StartupOption.GENCLUSTERID;
1194      } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1195        startOpt = StartupOption.REGULAR;
1196      } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1197        startOpt = StartupOption.BACKUP;
1198      } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1199        startOpt = StartupOption.CHECKPOINT;
1200      } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
1201        startOpt = StartupOption.UPGRADE;
1202        /* Can be followed by CLUSTERID with a required parameter or
1203         * RENAMERESERVED with an optional parameter
1204         */
1205        while (i + 1 < argsLen) {
1206          String flag = args[i + 1];
1207          if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1208            if (i + 2 < argsLen) {
1209              i += 2;
1210              startOpt.setClusterId(args[i]);
1211            } else {
1212              LOG.fatal("Must specify a valid cluster ID after the "
1213                  + StartupOption.CLUSTERID.getName() + " flag");
1214              return null;
1215            }
1216          } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED
1217              .getName())) {
1218            if (i + 2 < argsLen) {
1219              FSImageFormat.setRenameReservedPairs(args[i + 2]);
1220              i += 2;
1221            } else {
1222              FSImageFormat.useDefaultRenameReservedPairs();
1223              i += 1;
1224            }
1225          } else {
1226            LOG.fatal("Unknown upgrade flag " + flag);
1227            return null;
1228          }
1229        }
1230      } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) {
1231        startOpt = StartupOption.ROLLINGUPGRADE;
1232        ++i;
1233        startOpt.setRollingUpgradeStartupOption(args[i]);
1234      } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1235        startOpt = StartupOption.ROLLBACK;
1236      } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1237        startOpt = StartupOption.FINALIZE;
1238      } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1239        startOpt = StartupOption.IMPORT;
1240      } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1241        startOpt = StartupOption.BOOTSTRAPSTANDBY;
1242        return startOpt;
1243      } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1244        startOpt = StartupOption.INITIALIZESHAREDEDITS;
1245        for (i = i + 1 ; i < argsLen; i++) {
1246          if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1247            startOpt.setInteractiveFormat(false);
1248          } else if (StartupOption.FORCE.getName().equals(args[i])) {
1249            startOpt.setForceFormat(true);
1250          } else {
1251            LOG.fatal("Invalid argument: " + args[i]);
1252            return null;
1253          }
1254        }
1255        return startOpt;
1256      } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1257        if (startOpt != StartupOption.REGULAR) {
1258          throw new RuntimeException("Can't combine -recover with " +
1259              "other startup options.");
1260        }
1261        startOpt = StartupOption.RECOVER;
1262        while (++i < argsLen) {
1263          if (args[i].equalsIgnoreCase(
1264                StartupOption.FORCE.getName())) {
1265            startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1266          } else {
1267            throw new RuntimeException("Error parsing recovery options: " + 
1268              "can't understand option \"" + args[i] + "\"");
1269          }
1270        }
1271      } else if (StartupOption.METADATAVERSION.getName().equalsIgnoreCase(cmd)) {
1272        startOpt = StartupOption.METADATAVERSION;
1273      } else {
1274        return null;
1275      }
1276    }
1277    return startOpt;
1278  }
1279
1280  private static void setStartupOption(Configuration conf, StartupOption opt) {
1281    conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name());
1282  }
1283
1284  static StartupOption getStartupOption(Configuration conf) {
1285    return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1286                                          StartupOption.REGULAR.toString()));
1287  }
1288
1289  private static void doRecovery(StartupOption startOpt, Configuration conf)
1290      throws IOException {
1291    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1292    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1293    initializeGenericKeys(conf, nsId, namenodeId);
1294    if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1295      if (!confirmPrompt("You have selected Metadata Recovery mode.  " +
1296          "This mode is intended to recover lost metadata on a corrupt " +
1297          "filesystem.  Metadata recovery mode often permanently deletes " +
1298          "data from your HDFS filesystem.  Please back up your edit log " +
1299          "and fsimage before trying this!\n\n" +
1300          "Are you ready to proceed? (Y/N)\n")) {
1301        System.err.println("Recovery aborted at user request.\n");
1302        return;
1303      }
1304    }
1305    MetaRecoveryContext.LOG.info("starting recovery...");
1306    UserGroupInformation.setConfiguration(conf);
1307    NameNode.initMetrics(conf, startOpt.toNodeRole());
1308    FSNamesystem fsn = null;
1309    try {
1310      fsn = FSNamesystem.loadFromDisk(conf);
1311      fsn.getFSImage().saveNamespace(fsn);
1312      MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1313    } catch (IOException e) {
1314      MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1315      throw e;
1316    } catch (RuntimeException e) {
1317      MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1318      throw e;
1319    } finally {
1320      if (fsn != null)
1321        fsn.close();
1322    }
1323  }
1324
1325  /**
1326   * Verify that configured directories exist, then print the metadata versions
1327   * of the software and the image.
1328   *
1329   * @param conf configuration to use
1330   * @throws IOException
1331   */
1332  private static boolean printMetadataVersion(Configuration conf)
1333    throws IOException {
1334    final FSImage fsImage = new FSImage(conf);
1335    final FSNamesystem fs = new FSNamesystem(conf, fsImage, false);
1336    return fsImage.recoverTransitionRead(
1337      StartupOption.METADATAVERSION, fs, null);
1338  }
1339
1340  public static NameNode createNameNode(String argv[], Configuration conf)
1341      throws IOException {
1342    LOG.info("createNameNode " + Arrays.asList(argv));
1343    if (conf == null)
1344      conf = new HdfsConfiguration();
1345    StartupOption startOpt = parseArguments(argv);
1346    if (startOpt == null) {
1347      printUsage(System.err);
1348      return null;
1349    }
1350    setStartupOption(conf, startOpt);
1351
1352    switch (startOpt) {
1353      case FORMAT: {
1354        boolean aborted = format(conf, startOpt.getForceFormat(),
1355            startOpt.getInteractiveFormat());
1356        terminate(aborted ? 1 : 0);
1357        return null; // avoid javac warning
1358      }
1359      case GENCLUSTERID: {
1360        System.err.println("Generating new cluster id:");
1361        System.out.println(NNStorage.newClusterID());
1362        terminate(0);
1363        return null;
1364      }
1365      case FINALIZE: {
1366        System.err.println("Use of the argument '" + StartupOption.FINALIZE +
1367            "' is no longer supported. To finalize an upgrade, start the NN " +
1368            " and then run `hdfs dfsadmin -finalizeUpgrade'");
1369        terminate(1);
1370        return null; // avoid javac warning
1371      }
1372      case ROLLBACK: {
1373        boolean aborted = doRollback(conf, true);
1374        terminate(aborted ? 1 : 0);
1375        return null; // avoid warning
1376      }
1377      case BOOTSTRAPSTANDBY: {
1378        String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1379        int rc = BootstrapStandby.run(toolArgs, conf);
1380        terminate(rc);
1381        return null; // avoid warning
1382      }
1383      case INITIALIZESHAREDEDITS: {
1384        boolean aborted = initializeSharedEdits(conf,
1385            startOpt.getForceFormat(),
1386            startOpt.getInteractiveFormat());
1387        terminate(aborted ? 1 : 0);
1388        return null; // avoid warning
1389      }
1390      case BACKUP:
1391      case CHECKPOINT: {
1392        NamenodeRole role = startOpt.toNodeRole();
1393        DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1394        return new BackupNode(conf, role);
1395      }
1396      case RECOVER: {
1397        NameNode.doRecovery(startOpt, conf);
1398        return null;
1399      }
1400      case METADATAVERSION: {
1401        printMetadataVersion(conf);
1402        terminate(0);
1403        return null; // avoid javac warning
1404      }
1405      default: {
1406        DefaultMetricsSystem.initialize("NameNode");
1407        return new NameNode(conf);
1408      }
1409    }
1410  }
1411
1412  /**
1413   * In federation configuration is set for a set of
1414   * namenode and secondary namenode/backup/checkpointer, which are
1415   * grouped under a logical nameservice ID. The configuration keys specific 
1416   * to them have suffix set to configured nameserviceId.
1417   * 
1418   * This method copies the value from specific key of format key.nameserviceId
1419   * to key, to set up the generic configuration. Once this is done, only
1420   * generic version of the configuration is read in rest of the code, for
1421   * backward compatibility and simpler code changes.
1422   * 
1423   * @param conf
1424   *          Configuration object to lookup specific key and to set the value
1425   *          to the key passed. Note the conf object is modified
1426   * @param nameserviceId name service Id (to distinguish federated NNs)
1427   * @param namenodeId the namenode ID (to distinguish HA NNs)
1428   * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1429   */
1430  public static void initializeGenericKeys(Configuration conf,
1431      String nameserviceId, String namenodeId) {
1432    if ((nameserviceId != null && !nameserviceId.isEmpty()) || 
1433        (namenodeId != null && !namenodeId.isEmpty())) {
1434      if (nameserviceId != null) {
1435        conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1436      }
1437      if (namenodeId != null) {
1438        conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1439      }
1440      
1441      DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1442          NAMENODE_SPECIFIC_KEYS);
1443      DFSUtil.setGenericConf(conf, nameserviceId, null,
1444          NAMESERVICE_SPECIFIC_KEYS);
1445    }
1446    
1447    // If the RPC address is set use it to (re-)configure the default FS
1448    if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1449      URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1450          + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1451      conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1452      LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1453    }
1454  }
1455    
1456  /** 
1457   * Get the name service Id for the node
1458   * @return name service Id or null if federation is not configured
1459   */
1460  protected String getNameServiceId(Configuration conf) {
1461    return DFSUtil.getNamenodeNameServiceId(conf);
1462  }
1463  
1464  /**
1465   */
1466  public static void main(String argv[]) throws Exception {
1467    if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1468      System.exit(0);
1469    }
1470
1471    try {
1472      StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1473      NameNode namenode = createNameNode(argv, null);
1474      if (namenode != null) {
1475        namenode.join();
1476      }
1477    } catch (Throwable e) {
1478      LOG.fatal("Exception in namenode join", e);
1479      terminate(1, e);
1480    }
1481  }
1482
1483  synchronized void monitorHealth() 
1484      throws HealthCheckFailedException, AccessControlException {
1485    namesystem.checkSuperuserPrivilege();
1486    if (!haEnabled) {
1487      return; // no-op, if HA is not enabled
1488    }
1489    getNamesystem().checkAvailableResources();
1490    if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1491      throw new HealthCheckFailedException(
1492          "The NameNode has no resources available");
1493    }
1494  }
1495  
1496  synchronized void transitionToActive() 
1497      throws ServiceFailedException, AccessControlException {
1498    namesystem.checkSuperuserPrivilege();
1499    if (!haEnabled) {
1500      throw new ServiceFailedException("HA for namenode is not enabled");
1501    }
1502    state.setState(haContext, ACTIVE_STATE);
1503  }
1504  
1505  synchronized void transitionToStandby() 
1506      throws ServiceFailedException, AccessControlException {
1507    namesystem.checkSuperuserPrivilege();
1508    if (!haEnabled) {
1509      throw new ServiceFailedException("HA for namenode is not enabled");
1510    }
1511    state.setState(haContext, STANDBY_STATE);
1512  }
1513
1514  synchronized HAServiceStatus getServiceStatus()
1515      throws ServiceFailedException, AccessControlException {
1516    namesystem.checkSuperuserPrivilege();
1517    if (!haEnabled) {
1518      throw new ServiceFailedException("HA for namenode is not enabled");
1519    }
1520    if (state == null) {
1521      return new HAServiceStatus(HAServiceState.INITIALIZING);
1522    }
1523    HAServiceState retState = state.getServiceState();
1524    HAServiceStatus ret = new HAServiceStatus(retState);
1525    if (retState == HAServiceState.STANDBY) {
1526      String safemodeTip = namesystem.getSafeModeTip();
1527      if (!safemodeTip.isEmpty()) {
1528        ret.setNotReadyToBecomeActive(
1529            "The NameNode is in safemode. " +
1530            safemodeTip);
1531      } else {
1532        ret.setReadyToBecomeActive();
1533      }
1534    } else if (retState == HAServiceState.ACTIVE) {
1535      ret.setReadyToBecomeActive();
1536    } else {
1537      ret.setNotReadyToBecomeActive("State is " + state);
1538    }
1539    return ret;
1540  }
1541
1542  synchronized HAServiceState getServiceState() {
1543    if (state == null) {
1544      return HAServiceState.INITIALIZING;
1545    }
1546    return state.getServiceState();
1547  }
1548
1549  /**
1550   * Register NameNodeStatusMXBean
1551   */
1552  private void registerNNSMXBean() {
1553    nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this);
1554  }
1555
1556  @Override // NameNodeStatusMXBean
1557  public String getNNRole() {
1558    String roleStr = "";
1559    NamenodeRole role = getRole();
1560    if (null != role) {
1561      roleStr = role.toString();
1562    }
1563    return roleStr;
1564  }
1565
1566  @Override // NameNodeStatusMXBean
1567  public String getState() {
1568    String servStateStr = "";
1569    HAServiceState servState = getServiceState();
1570    if (null != servState) {
1571      servStateStr = servState.toString();
1572    }
1573    return servStateStr;
1574  }
1575
1576  @Override // NameNodeStatusMXBean
1577  public String getHostAndPort() {
1578    return getNameNodeAddressHostPortString();
1579  }
1580
1581  @Override // NameNodeStatusMXBean
1582  public boolean isSecurityEnabled() {
1583    return UserGroupInformation.isSecurityEnabled();
1584  }
1585
1586  /**
1587   * Shutdown the NN immediately in an ungraceful way. Used when it would be
1588   * unsafe for the NN to continue operating, e.g. during a failed HA state
1589   * transition.
1590   * 
1591   * @param t exception which warrants the shutdown. Printed to the NN log
1592   *          before exit.
1593   * @throws ExitException thrown only for testing.
1594   */
1595  protected synchronized void doImmediateShutdown(Throwable t)
1596      throws ExitException {
1597    String message = "Error encountered requiring NN shutdown. " +
1598        "Shutting down immediately.";
1599    try {
1600      LOG.fatal(message, t);
1601    } catch (Throwable ignored) {
1602      // This is unlikely to happen, but there's nothing we can do if it does.
1603    }
1604    terminate(1, t);
1605  }
1606  
1607  /**
1608   * Class used to expose {@link NameNode} as context to {@link HAState}
1609   */
1610  protected class NameNodeHAContext implements HAContext {
1611    @Override
1612    public void setState(HAState s) {
1613      state = s;
1614    }
1615
1616    @Override
1617    public HAState getState() {
1618      return state;
1619    }
1620
1621    @Override
1622    public void startActiveServices() throws IOException {
1623      try {
1624        namesystem.startActiveServices();
1625        startTrashEmptier(conf);
1626      } catch (Throwable t) {
1627        doImmediateShutdown(t);
1628      }
1629    }
1630
1631    @Override
1632    public void stopActiveServices() throws IOException {
1633      try {
1634        if (namesystem != null) {
1635          namesystem.stopActiveServices();
1636        }
1637        stopTrashEmptier();
1638      } catch (Throwable t) {
1639        doImmediateShutdown(t);
1640      }
1641    }
1642
1643    @Override
1644    public void startStandbyServices() throws IOException {
1645      try {
1646        namesystem.startStandbyServices(conf);
1647      } catch (Throwable t) {
1648        doImmediateShutdown(t);
1649      }
1650    }
1651
1652    @Override
1653    public void prepareToStopStandbyServices() throws ServiceFailedException {
1654      try {
1655        namesystem.prepareToStopStandbyServices();
1656      } catch (Throwable t) {
1657        doImmediateShutdown(t);
1658      }
1659    }
1660    
1661    @Override
1662    public void stopStandbyServices() throws IOException {
1663      try {
1664        if (namesystem != null) {
1665          namesystem.stopStandbyServices();
1666        }
1667      } catch (Throwable t) {
1668        doImmediateShutdown(t);
1669      }
1670    }
1671    
1672    @Override
1673    public void writeLock() {
1674      namesystem.writeLock();
1675      namesystem.lockRetryCache();
1676    }
1677    
1678    @Override
1679    public void writeUnlock() {
1680      namesystem.unlockRetryCache();
1681      namesystem.writeUnlock();
1682    }
1683    
1684    /** Check if an operation of given category is allowed */
1685    @Override
1686    public void checkOperation(final OperationCategory op)
1687        throws StandbyException {
1688      state.checkOperation(haContext, op);
1689    }
1690    
1691    @Override
1692    public boolean allowStaleReads() {
1693      return allowStaleStandbyReads;
1694    }
1695
1696  }
1697  
1698  public boolean isStandbyState() {
1699    return (state.equals(STANDBY_STATE));
1700  }
1701  
1702  public boolean isActiveState() {
1703    return (state.equals(ACTIVE_STATE));
1704  }
1705  
1706  /**
1707   * Check that a request to change this node's HA state is valid.
1708   * In particular, verifies that, if auto failover is enabled, non-forced
1709   * requests from the HAAdmin CLI are rejected, and vice versa.
1710   *
1711   * @param req the request to check
1712   * @throws AccessControlException if the request is disallowed
1713   */
1714  void checkHaStateChange(StateChangeRequestInfo req)
1715      throws AccessControlException {
1716    boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1717        DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1718    switch (req.getSource()) {
1719    case REQUEST_BY_USER:
1720      if (autoHaEnabled) {
1721        throw new AccessControlException(
1722            "Manual HA control for this NameNode is disallowed, because " +
1723            "automatic HA is enabled.");
1724      }
1725      break;
1726    case REQUEST_BY_USER_FORCED:
1727      if (autoHaEnabled) {
1728        LOG.warn("Allowing manual HA control from " +
1729            Server.getRemoteAddress() +
1730            " even though automatic HA is enabled, because the user " +
1731            "specified the force flag");
1732      }
1733      break;
1734    case REQUEST_BY_ZKFC:
1735      if (!autoHaEnabled) {
1736        throw new AccessControlException(
1737            "Request from ZK failover controller at " +
1738            Server.getRemoteAddress() + " denied since automatic HA " +
1739            "is not enabled"); 
1740      }
1741      break;
1742    }
1743  }
1744}