001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import com.google.common.annotations.VisibleForTesting; 021import com.google.common.base.Joiner; 022import com.google.common.base.Preconditions; 023import com.google.common.collect.Lists; 024 025import org.apache.commons.logging.Log; 026import org.apache.commons.logging.LogFactory; 027import org.apache.hadoop.HadoopIllegalArgumentException; 028import org.apache.hadoop.classification.InterfaceAudience; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Trash; 032import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 033import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; 034import org.apache.hadoop.ha.HAServiceStatus; 035import org.apache.hadoop.ha.HealthCheckFailedException; 036import org.apache.hadoop.ha.ServiceFailedException; 037import org.apache.hadoop.hdfs.DFSConfigKeys; 038import org.apache.hadoop.hdfs.DFSUtil; 039import org.apache.hadoop.hdfs.HAUtil; 040import org.apache.hadoop.hdfs.HdfsConfiguration; 041import org.apache.hadoop.hdfs.protocol.ClientProtocol; 042import org.apache.hadoop.hdfs.protocol.HdfsConstants; 043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 045import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 046import org.apache.hadoop.hdfs.server.namenode.ha.*; 047import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 048import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 049import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics; 050import org.apache.hadoop.hdfs.server.protocol.*; 051import org.apache.hadoop.ipc.Server; 052import org.apache.hadoop.ipc.StandbyException; 053import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 054import org.apache.hadoop.metrics2.util.MBeans; 055import org.apache.hadoop.net.NetUtils; 056import org.apache.hadoop.security.AccessControlException; 057import org.apache.hadoop.security.RefreshUserMappingsProtocol; 058import org.apache.hadoop.security.SecurityUtil; 059import org.apache.hadoop.security.UserGroupInformation; 060import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; 061import org.apache.hadoop.ipc.RefreshCallQueueProtocol; 062import org.apache.hadoop.tools.GetUserMappingsProtocol; 063import org.apache.hadoop.util.ExitUtil.ExitException; 064import org.apache.hadoop.util.JvmPauseMonitor; 065import org.apache.hadoop.util.ServicePlugin; 066import org.apache.hadoop.util.StringUtils; 067 068import javax.management.ObjectName; 069 070import java.io.IOException; 071import java.io.PrintStream; 072import java.net.InetSocketAddress; 073import java.net.URI; 074import java.security.PrivilegedExceptionAction; 075import java.util.ArrayList; 076import java.util.Arrays; 077import java.util.Collection; 078import java.util.List; 079 080import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 081import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 082import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 083import static org.apache.hadoop.hdfs.DFSConfigKeys.*; 084import static org.apache.hadoop.util.ExitUtil.terminate; 085import static org.apache.hadoop.util.ToolRunner.confirmPrompt; 086 087/********************************************************** 088 * NameNode serves as both directory namespace manager and 089 * "inode table" for the Hadoop DFS. There is a single NameNode 090 * running in any DFS deployment. (Well, except when there 091 * is a second backup/failover NameNode, or when using federated NameNodes.) 092 * 093 * The NameNode controls two critical tables: 094 * 1) filename->blocksequence (namespace) 095 * 2) block->machinelist ("inodes") 096 * 097 * The first table is stored on disk and is very precious. 098 * The second table is rebuilt every time the NameNode comes up. 099 * 100 * 'NameNode' refers to both this class as well as the 'NameNode server'. 101 * The 'FSNamesystem' class actually performs most of the filesystem 102 * management. The majority of the 'NameNode' class itself is concerned 103 * with exposing the IPC interface and the HTTP server to the outside world, 104 * plus some configuration management. 105 * 106 * NameNode implements the 107 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which 108 * allows clients to ask for DFS services. 109 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for 110 * direct use by authors of DFS client code. End-users should instead use the 111 * {@link org.apache.hadoop.fs.FileSystem} class. 112 * 113 * NameNode also implements the 114 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface, 115 * used by DataNodes that actually store DFS data blocks. These 116 * methods are invoked repeatedly and automatically by all the 117 * DataNodes in a DFS deployment. 118 * 119 * NameNode also implements the 120 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface, 121 * used by secondary namenodes or rebalancing processes to get partial 122 * NameNode state, for example partial blocksMap etc. 123 **********************************************************/ 124@InterfaceAudience.Private 125public class NameNode implements NameNodeStatusMXBean { 126 static{ 127 HdfsConfiguration.init(); 128 } 129 130 /** 131 * Categories of operations supported by the namenode. 132 */ 133 public static enum OperationCategory { 134 /** Operations that are state agnostic */ 135 UNCHECKED, 136 /** Read operation that does not change the namespace state */ 137 READ, 138 /** Write operation that changes the namespace state */ 139 WRITE, 140 /** Operations related to checkpointing */ 141 CHECKPOINT, 142 /** Operations related to {@link JournalProtocol} */ 143 JOURNAL 144 } 145 146 /** 147 * HDFS configuration can have three types of parameters: 148 * <ol> 149 * <li>Parameters that are common for all the name services in the cluster.</li> 150 * <li>Parameters that are specific to a name service. These keys are suffixed 151 * with nameserviceId in the configuration. For example, 152 * "dfs.namenode.rpc-address.nameservice1".</li> 153 * <li>Parameters that are specific to a single name node. These keys are suffixed 154 * with nameserviceId and namenodeId in the configuration. for example, 155 * "dfs.namenode.rpc-address.nameservice1.namenode1"</li> 156 * </ol> 157 * 158 * In the latter cases, operators may specify the configuration without 159 * any suffix, with a nameservice suffix, or with a nameservice and namenode 160 * suffix. The more specific suffix will take precedence. 161 * 162 * These keys are specific to a given namenode, and thus may be configured 163 * globally, for a nameservice, or for a specific namenode within a nameservice. 164 */ 165 public static final String[] NAMENODE_SPECIFIC_KEYS = { 166 DFS_NAMENODE_RPC_ADDRESS_KEY, 167 DFS_NAMENODE_RPC_BIND_HOST_KEY, 168 DFS_NAMENODE_NAME_DIR_KEY, 169 DFS_NAMENODE_EDITS_DIR_KEY, 170 DFS_NAMENODE_SHARED_EDITS_DIR_KEY, 171 DFS_NAMENODE_CHECKPOINT_DIR_KEY, 172 DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY, 173 DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, 174 DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY, 175 DFS_NAMENODE_HTTP_ADDRESS_KEY, 176 DFS_NAMENODE_HTTPS_ADDRESS_KEY, 177 DFS_NAMENODE_HTTP_BIND_HOST_KEY, 178 DFS_NAMENODE_HTTPS_BIND_HOST_KEY, 179 DFS_NAMENODE_KEYTAB_FILE_KEY, 180 DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, 181 DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY, 182 DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY, 183 DFS_NAMENODE_BACKUP_ADDRESS_KEY, 184 DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY, 185 DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY, 186 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, 187 DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY, 188 DFS_HA_FENCE_METHODS_KEY, 189 DFS_HA_ZKFC_PORT_KEY, 190 DFS_HA_FENCE_METHODS_KEY 191 }; 192 193 /** 194 * @see #NAMENODE_SPECIFIC_KEYS 195 * These keys are specific to a nameservice, but may not be overridden 196 * for a specific namenode. 197 */ 198 public static final String[] NAMESERVICE_SPECIFIC_KEYS = { 199 DFS_HA_AUTO_FAILOVER_ENABLED_KEY 200 }; 201 202 private static final String USAGE = "Usage: java NameNode [" 203 + StartupOption.BACKUP.getName() + "] | \n\t[" 204 + StartupOption.CHECKPOINT.getName() + "] | \n\t[" 205 + StartupOption.FORMAT.getName() + " [" 206 + StartupOption.CLUSTERID.getName() + " cid ] [" 207 + StartupOption.FORCE.getName() + "] [" 208 + StartupOption.NONINTERACTIVE.getName() + "] ] | \n\t[" 209 + StartupOption.UPGRADE.getName() + 210 " [" + StartupOption.CLUSTERID.getName() + " cid]" + 211 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t[" 212 + StartupOption.ROLLBACK.getName() + "] | \n\t[" 213 + StartupOption.ROLLINGUPGRADE.getName() + " <" 214 + RollingUpgradeStartupOption.DOWNGRADE.name().toLowerCase() + "|" 215 + RollingUpgradeStartupOption.ROLLBACK.name().toLowerCase() + "> ] | \n\t[" 216 + StartupOption.FINALIZE.getName() + "] | \n\t[" 217 + StartupOption.IMPORT.getName() + "] | \n\t[" 218 + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | \n\t[" 219 + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | \n\t[" 220 + StartupOption.RECOVER.getName() + " [ " 221 + StartupOption.FORCE.getName() + "] ] | \n\t[" 222 + StartupOption.METADATAVERSION.getName() + " ] " 223 + " ]"; 224 225 226 public long getProtocolVersion(String protocol, 227 long clientVersion) throws IOException { 228 if (protocol.equals(ClientProtocol.class.getName())) { 229 return ClientProtocol.versionID; 230 } else if (protocol.equals(DatanodeProtocol.class.getName())){ 231 return DatanodeProtocol.versionID; 232 } else if (protocol.equals(NamenodeProtocol.class.getName())){ 233 return NamenodeProtocol.versionID; 234 } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){ 235 return RefreshAuthorizationPolicyProtocol.versionID; 236 } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){ 237 return RefreshUserMappingsProtocol.versionID; 238 } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) { 239 return RefreshCallQueueProtocol.versionID; 240 } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){ 241 return GetUserMappingsProtocol.versionID; 242 } else { 243 throw new IOException("Unknown protocol to name node: " + protocol); 244 } 245 } 246 247 public static final int DEFAULT_PORT = 8020; 248 public static final Log LOG = LogFactory.getLog(NameNode.class.getName()); 249 public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange"); 250 public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange"); 251 public static final HAState ACTIVE_STATE = new ActiveState(); 252 public static final HAState STANDBY_STATE = new StandbyState(); 253 254 protected FSNamesystem namesystem; 255 protected final Configuration conf; 256 protected final NamenodeRole role; 257 private volatile HAState state; 258 private final boolean haEnabled; 259 private final HAContext haContext; 260 protected final boolean allowStaleStandbyReads; 261 262 263 /** httpServer */ 264 protected NameNodeHttpServer httpServer; 265 private Thread emptier; 266 /** only used for testing purposes */ 267 protected boolean stopRequested = false; 268 /** Registration information of this name-node */ 269 protected NamenodeRegistration nodeRegistration; 270 /** Activated plug-ins. */ 271 private List<ServicePlugin> plugins; 272 273 private NameNodeRpcServer rpcServer; 274 275 private JvmPauseMonitor pauseMonitor; 276 private ObjectName nameNodeStatusBeanName; 277 /** 278 * The namenode address that clients will use to access this namenode 279 * or the name service. For HA configurations using logical URI, it 280 * will be the logical address. 281 */ 282 private String clientNamenodeAddress; 283 284 /** Format a new filesystem. Destroys any filesystem that may already 285 * exist at this location. **/ 286 public static void format(Configuration conf) throws IOException { 287 format(conf, true, true); 288 } 289 290 static NameNodeMetrics metrics; 291 private static final StartupProgress startupProgress = new StartupProgress(); 292 /** Return the {@link FSNamesystem} object. 293 * @return {@link FSNamesystem} object. 294 */ 295 public FSNamesystem getNamesystem() { 296 return namesystem; 297 } 298 299 public NamenodeProtocols getRpcServer() { 300 return rpcServer; 301 } 302 303 static void initMetrics(Configuration conf, NamenodeRole role) { 304 metrics = NameNodeMetrics.create(conf, role); 305 } 306 307 public static NameNodeMetrics getNameNodeMetrics() { 308 return metrics; 309 } 310 311 /** 312 * Returns object used for reporting namenode startup progress. 313 * 314 * @return StartupProgress for reporting namenode startup progress 315 */ 316 public static StartupProgress getStartupProgress() { 317 return startupProgress; 318 } 319 320 /** 321 * Return the service name of the issued delegation token. 322 * 323 * @return The name service id in HA-mode, or the rpc address in non-HA mode 324 */ 325 public String getTokenServiceName() { 326 return getClientNamenodeAddress(); 327 } 328 329 /** 330 * Set the namenode address that will be used by clients to access this 331 * namenode or name service. This needs to be called before the config 332 * is overriden. 333 */ 334 public void setClientNamenodeAddress(Configuration conf) { 335 String nnAddr = conf.get(FS_DEFAULT_NAME_KEY); 336 if (nnAddr == null) { 337 // default fs is not set. 338 clientNamenodeAddress = null; 339 return; 340 } 341 342 LOG.info(FS_DEFAULT_NAME_KEY + " is " + nnAddr); 343 URI nnUri = URI.create(nnAddr); 344 345 String nnHost = nnUri.getHost(); 346 if (nnHost == null) { 347 clientNamenodeAddress = null; 348 return; 349 } 350 351 if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) { 352 // host name is logical 353 clientNamenodeAddress = nnHost; 354 } else if (nnUri.getPort() > 0) { 355 // physical address with a valid port 356 clientNamenodeAddress = nnUri.getAuthority(); 357 } else { 358 // the port is missing or 0. Figure out real bind address later. 359 clientNamenodeAddress = null; 360 return; 361 } 362 LOG.info("Clients are to use " + clientNamenodeAddress + " to access" 363 + " this namenode/service."); 364 } 365 366 /** 367 * Get the namenode address to be used by clients. 368 * @return nn address 369 */ 370 public String getClientNamenodeAddress() { 371 return clientNamenodeAddress; 372 } 373 374 public static InetSocketAddress getAddress(String address) { 375 return NetUtils.createSocketAddr(address, DEFAULT_PORT); 376 } 377 378 /** 379 * Set the configuration property for the service rpc address 380 * to address 381 */ 382 public static void setServiceAddress(Configuration conf, 383 String address) { 384 LOG.info("Setting ADDRESS " + address); 385 conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address); 386 } 387 388 /** 389 * Fetches the address for services to use when connecting to namenode 390 * based on the value of fallback returns null if the special 391 * address is not specified or returns the default namenode address 392 * to be used by both clients and services. 393 * Services here are datanodes, backup node, any non client connection 394 */ 395 public static InetSocketAddress getServiceAddress(Configuration conf, 396 boolean fallback) { 397 String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY); 398 if (addr == null || addr.isEmpty()) { 399 return fallback ? getAddress(conf) : null; 400 } 401 return getAddress(addr); 402 } 403 404 public static InetSocketAddress getAddress(Configuration conf) { 405 URI filesystemURI = FileSystem.getDefaultUri(conf); 406 return getAddress(filesystemURI); 407 } 408 409 410 /** 411 * @return address of file system 412 */ 413 public static InetSocketAddress getAddress(URI filesystemURI) { 414 String authority = filesystemURI.getAuthority(); 415 if (authority == null) { 416 throw new IllegalArgumentException(String.format( 417 "Invalid URI for NameNode address (check %s): %s has no authority.", 418 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString())); 419 } 420 if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase( 421 filesystemURI.getScheme())) { 422 throw new IllegalArgumentException(String.format( 423 "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.", 424 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(), 425 HdfsConstants.HDFS_URI_SCHEME)); 426 } 427 return getAddress(authority); 428 } 429 430 public static URI getUri(InetSocketAddress namenode) { 431 int port = namenode.getPort(); 432 String portString = port == DEFAULT_PORT ? "" : (":"+port); 433 return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 434 + namenode.getHostName()+portString); 435 } 436 437 // 438 // Common NameNode methods implementation for the active name-node role. 439 // 440 public NamenodeRole getRole() { 441 return role; 442 } 443 444 boolean isRole(NamenodeRole that) { 445 return role.equals(that); 446 } 447 448 /** 449 * Given a configuration get the address of the service rpc server 450 * If the service rpc is not configured returns null 451 */ 452 protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) { 453 return NameNode.getServiceAddress(conf, false); 454 } 455 456 protected InetSocketAddress getRpcServerAddress(Configuration conf) { 457 return getAddress(conf); 458 } 459 460 /** Given a configuration get the bind host of the service rpc server 461 * If the bind host is not configured returns null. 462 */ 463 protected String getServiceRpcServerBindHost(Configuration conf) { 464 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY); 465 if (addr == null || addr.isEmpty()) { 466 return null; 467 } 468 return addr; 469 } 470 471 /** Given a configuration get the bind host of the client rpc server 472 * If the bind host is not configured returns null. 473 */ 474 protected String getRpcServerBindHost(Configuration conf) { 475 String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY); 476 if (addr == null || addr.isEmpty()) { 477 return null; 478 } 479 return addr; 480 } 481 482 /** 483 * Modifies the configuration passed to contain the service rpc address setting 484 */ 485 protected void setRpcServiceServerAddress(Configuration conf, 486 InetSocketAddress serviceRPCAddress) { 487 setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress)); 488 } 489 490 protected void setRpcServerAddress(Configuration conf, 491 InetSocketAddress rpcAddress) { 492 FileSystem.setDefaultUri(conf, getUri(rpcAddress)); 493 } 494 495 protected InetSocketAddress getHttpServerAddress(Configuration conf) { 496 return getHttpAddress(conf); 497 } 498 499 /** 500 * HTTP server address for binding the endpoint. This method is 501 * for use by the NameNode and its derivatives. It may return 502 * a different address than the one that should be used by clients to 503 * connect to the NameNode. See 504 * {@link DFSConfigKeys#DFS_NAMENODE_HTTP_BIND_HOST_KEY} 505 * 506 * @param conf 507 * @return 508 */ 509 protected InetSocketAddress getHttpServerBindAddress(Configuration conf) { 510 InetSocketAddress bindAddress = getHttpServerAddress(conf); 511 512 // If DFS_NAMENODE_HTTP_BIND_HOST_KEY exists then it overrides the 513 // host name portion of DFS_NAMENODE_HTTP_ADDRESS_KEY. 514 final String bindHost = conf.getTrimmed(DFS_NAMENODE_HTTP_BIND_HOST_KEY); 515 if (bindHost != null && !bindHost.isEmpty()) { 516 bindAddress = new InetSocketAddress(bindHost, bindAddress.getPort()); 517 } 518 519 return bindAddress; 520 } 521 522 /** @return the NameNode HTTP address. */ 523 public static InetSocketAddress getHttpAddress(Configuration conf) { 524 return NetUtils.createSocketAddr( 525 conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT)); 526 } 527 528 protected void loadNamesystem(Configuration conf) throws IOException { 529 this.namesystem = FSNamesystem.loadFromDisk(conf); 530 } 531 532 NamenodeRegistration getRegistration() { 533 return nodeRegistration; 534 } 535 536 NamenodeRegistration setRegistration() { 537 nodeRegistration = new NamenodeRegistration( 538 NetUtils.getHostPortString(rpcServer.getRpcAddress()), 539 NetUtils.getHostPortString(getHttpAddress()), 540 getFSImage().getStorage(), getRole()); 541 return nodeRegistration; 542 } 543 544 /* optimize ugi lookup for RPC operations to avoid a trip through 545 * UGI.getCurrentUser which is synch'ed 546 */ 547 public static UserGroupInformation getRemoteUser() throws IOException { 548 UserGroupInformation ugi = Server.getRemoteUser(); 549 return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser(); 550 } 551 552 553 /** 554 * Login as the configured user for the NameNode. 555 */ 556 void loginAsNameNodeUser(Configuration conf) throws IOException { 557 InetSocketAddress socAddr = getRpcServerAddress(conf); 558 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 559 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 560 } 561 562 /** 563 * Initialize name-node. 564 * 565 * @param conf the configuration 566 */ 567 protected void initialize(Configuration conf) throws IOException { 568 if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) { 569 String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY); 570 if (intervals != null) { 571 conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS, 572 intervals); 573 } 574 } 575 576 UserGroupInformation.setConfiguration(conf); 577 loginAsNameNodeUser(conf); 578 579 NameNode.initMetrics(conf, this.getRole()); 580 StartupProgressMetrics.register(startupProgress); 581 582 if (NamenodeRole.NAMENODE == role) { 583 startHttpServer(conf); 584 } 585 loadNamesystem(conf); 586 587 rpcServer = createRpcServer(conf); 588 if (clientNamenodeAddress == null) { 589 // This is expected for MiniDFSCluster. Set it now using 590 // the RPC server's bind address. 591 clientNamenodeAddress = 592 NetUtils.getHostPortString(rpcServer.getRpcAddress()); 593 LOG.info("Clients are to use " + clientNamenodeAddress + " to access" 594 + " this namenode/service."); 595 } 596 if (NamenodeRole.NAMENODE == role) { 597 httpServer.setNameNodeAddress(getNameNodeAddress()); 598 httpServer.setFSImage(getFSImage()); 599 } 600 601 pauseMonitor = new JvmPauseMonitor(conf); 602 pauseMonitor.start(); 603 metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); 604 605 startCommonServices(conf); 606 } 607 608 /** 609 * Create the RPC server implementation. Used as an extension point for the 610 * BackupNode. 611 */ 612 protected NameNodeRpcServer createRpcServer(Configuration conf) 613 throws IOException { 614 return new NameNodeRpcServer(conf, this); 615 } 616 617 /** Start the services common to active and standby states */ 618 private void startCommonServices(Configuration conf) throws IOException { 619 namesystem.startCommonServices(conf, haContext); 620 registerNNSMXBean(); 621 if (NamenodeRole.NAMENODE != role) { 622 startHttpServer(conf); 623 httpServer.setNameNodeAddress(getNameNodeAddress()); 624 httpServer.setFSImage(getFSImage()); 625 } 626 rpcServer.start(); 627 plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY, 628 ServicePlugin.class); 629 for (ServicePlugin p: plugins) { 630 try { 631 p.start(this); 632 } catch (Throwable t) { 633 LOG.warn("ServicePlugin " + p + " could not be started", t); 634 } 635 } 636 LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress()); 637 if (rpcServer.getServiceRpcAddress() != null) { 638 LOG.info(getRole() + " service RPC up at: " 639 + rpcServer.getServiceRpcAddress()); 640 } 641 } 642 643 private void stopCommonServices() { 644 if(rpcServer != null) rpcServer.stop(); 645 if(namesystem != null) namesystem.close(); 646 if (pauseMonitor != null) pauseMonitor.stop(); 647 if (plugins != null) { 648 for (ServicePlugin p : plugins) { 649 try { 650 p.stop(); 651 } catch (Throwable t) { 652 LOG.warn("ServicePlugin " + p + " could not be stopped", t); 653 } 654 } 655 } 656 stopHttpServer(); 657 } 658 659 private void startTrashEmptier(final Configuration conf) throws IOException { 660 long trashInterval = 661 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT); 662 if (trashInterval == 0) { 663 return; 664 } else if (trashInterval < 0) { 665 throw new IOException("Cannot start trash emptier with negative interval." 666 + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value."); 667 } 668 669 // This may be called from the transitionToActive code path, in which 670 // case the current user is the administrator, not the NN. The trash 671 // emptier needs to run as the NN. See HDFS-3972. 672 FileSystem fs = SecurityUtil.doAsLoginUser( 673 new PrivilegedExceptionAction<FileSystem>() { 674 @Override 675 public FileSystem run() throws IOException { 676 return FileSystem.get(conf); 677 } 678 }); 679 this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier"); 680 this.emptier.setDaemon(true); 681 this.emptier.start(); 682 } 683 684 private void stopTrashEmptier() { 685 if (this.emptier != null) { 686 emptier.interrupt(); 687 emptier = null; 688 } 689 } 690 691 private void startHttpServer(final Configuration conf) throws IOException { 692 httpServer = new NameNodeHttpServer(conf, this, getHttpServerBindAddress(conf)); 693 httpServer.start(); 694 httpServer.setStartupProgress(startupProgress); 695 } 696 697 private void stopHttpServer() { 698 try { 699 if (httpServer != null) httpServer.stop(); 700 } catch (Exception e) { 701 LOG.error("Exception while stopping httpserver", e); 702 } 703 } 704 705 /** 706 * Start NameNode. 707 * <p> 708 * The name-node can be started with one of the following startup options: 709 * <ul> 710 * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li> 711 * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li> 712 * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li> 713 * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li> 714 * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster 715 * upgrade and create a snapshot of the current file system state</li> 716 * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node 717 * metadata</li> 718 * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the 719 * cluster back to the previous state</li> 720 * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 721 * previous upgrade</li> 722 * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li> 723 * </ul> 724 * The option is passed via configuration field: 725 * <tt>dfs.namenode.startup</tt> 726 * 727 * The conf will be modified to reflect the actual ports on which 728 * the NameNode is up and running if the user passes the port as 729 * <code>zero</code> in the conf. 730 * 731 * @param conf confirguration 732 * @throws IOException 733 */ 734 public NameNode(Configuration conf) throws IOException { 735 this(conf, NamenodeRole.NAMENODE); 736 } 737 738 protected NameNode(Configuration conf, NamenodeRole role) 739 throws IOException { 740 this.conf = conf; 741 this.role = role; 742 setClientNamenodeAddress(conf); 743 String nsId = getNameServiceId(conf); 744 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 745 this.haEnabled = HAUtil.isHAEnabled(conf, nsId); 746 state = createHAState(getStartupOption(conf)); 747 this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf); 748 this.haContext = createHAContext(); 749 try { 750 initializeGenericKeys(conf, nsId, namenodeId); 751 initialize(conf); 752 try { 753 haContext.writeLock(); 754 state.prepareToEnterState(haContext); 755 state.enterState(haContext); 756 } finally { 757 haContext.writeUnlock(); 758 } 759 } catch (IOException e) { 760 this.stop(); 761 throw e; 762 } catch (HadoopIllegalArgumentException e) { 763 this.stop(); 764 throw e; 765 } 766 } 767 768 protected HAState createHAState(StartupOption startOpt) { 769 if (!haEnabled || startOpt == StartupOption.UPGRADE) { 770 return ACTIVE_STATE; 771 } else { 772 return STANDBY_STATE; 773 } 774 } 775 776 protected HAContext createHAContext() { 777 return new NameNodeHAContext(); 778 } 779 780 /** 781 * Wait for service to finish. 782 * (Normally, it runs forever.) 783 */ 784 public void join() { 785 try { 786 rpcServer.join(); 787 } catch (InterruptedException ie) { 788 LOG.info("Caught interrupted exception ", ie); 789 } 790 } 791 792 /** 793 * Stop all NameNode threads and wait for all to finish. 794 */ 795 public void stop() { 796 synchronized(this) { 797 if (stopRequested) 798 return; 799 stopRequested = true; 800 } 801 try { 802 if (state != null) { 803 state.exitState(haContext); 804 } 805 } catch (ServiceFailedException e) { 806 LOG.warn("Encountered exception while exiting state ", e); 807 } finally { 808 stopCommonServices(); 809 if (metrics != null) { 810 metrics.shutdown(); 811 } 812 if (namesystem != null) { 813 namesystem.shutdown(); 814 } 815 if (nameNodeStatusBeanName != null) { 816 MBeans.unregister(nameNodeStatusBeanName); 817 nameNodeStatusBeanName = null; 818 } 819 } 820 } 821 822 synchronized boolean isStopRequested() { 823 return stopRequested; 824 } 825 826 /** 827 * Is the cluster currently in safe mode? 828 */ 829 public boolean isInSafeMode() { 830 return namesystem.isInSafeMode(); 831 } 832 833 /** get FSImage */ 834 @VisibleForTesting 835 public FSImage getFSImage() { 836 return namesystem.getFSImage(); 837 } 838 839 /** 840 * @return NameNode RPC address 841 */ 842 public InetSocketAddress getNameNodeAddress() { 843 return rpcServer.getRpcAddress(); 844 } 845 846 /** 847 * @return NameNode RPC address in "host:port" string form 848 */ 849 public String getNameNodeAddressHostPortString() { 850 return NetUtils.getHostPortString(rpcServer.getRpcAddress()); 851 } 852 853 /** 854 * @return NameNode service RPC address if configured, the 855 * NameNode RPC address otherwise 856 */ 857 public InetSocketAddress getServiceRpcAddress() { 858 final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress(); 859 return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr; 860 } 861 862 /** 863 * @return NameNode HTTP address, used by the Web UI, image transfer, 864 * and HTTP-based file system clients like Hftp and WebHDFS 865 */ 866 public InetSocketAddress getHttpAddress() { 867 return httpServer.getHttpAddress(); 868 } 869 870 /** 871 * @return NameNode HTTPS address, used by the Web UI, image transfer, 872 * and HTTP-based file system clients like Hftp and WebHDFS 873 */ 874 public InetSocketAddress getHttpsAddress() { 875 return httpServer.getHttpsAddress(); 876 } 877 878 /** 879 * Verify that configured directories exist, then 880 * Interactively confirm that formatting is desired 881 * for each existing directory and format them. 882 * 883 * @param conf configuration to use 884 * @param force if true, format regardless of whether dirs exist 885 * @return true if formatting was aborted, false otherwise 886 * @throws IOException 887 */ 888 private static boolean format(Configuration conf, boolean force, 889 boolean isInteractive) throws IOException { 890 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 891 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 892 initializeGenericKeys(conf, nsId, namenodeId); 893 checkAllowFormat(conf); 894 895 if (UserGroupInformation.isSecurityEnabled()) { 896 InetSocketAddress socAddr = getAddress(conf); 897 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 898 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 899 } 900 901 Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf); 902 List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf); 903 List<URI> dirsToPrompt = new ArrayList<URI>(); 904 dirsToPrompt.addAll(nameDirsToFormat); 905 dirsToPrompt.addAll(sharedDirs); 906 List<URI> editDirsToFormat = 907 FSNamesystem.getNamespaceEditsDirs(conf); 908 909 // if clusterID is not provided - see if you can find the current one 910 String clusterId = StartupOption.FORMAT.getClusterId(); 911 if(clusterId == null || clusterId.equals("")) { 912 //Generate a new cluster id 913 clusterId = NNStorage.newClusterID(); 914 } 915 System.out.println("Formatting using clusterid: " + clusterId); 916 917 FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat); 918 try { 919 FSNamesystem fsn = new FSNamesystem(conf, fsImage); 920 fsImage.getEditLog().initJournalsForWrite(); 921 922 if (!fsImage.confirmFormat(force, isInteractive)) { 923 return true; // aborted 924 } 925 926 fsImage.format(fsn, clusterId); 927 } catch (IOException ioe) { 928 LOG.warn("Encountered exception during format: ", ioe); 929 fsImage.close(); 930 throw ioe; 931 } 932 return false; 933 } 934 935 public static void checkAllowFormat(Configuration conf) throws IOException { 936 if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 937 DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) { 938 throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY 939 + " is set to false for this filesystem, so it " 940 + "cannot be formatted. You will need to set " 941 + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter " 942 + "to true in order to format this filesystem"); 943 } 944 } 945 946 @VisibleForTesting 947 public static boolean initializeSharedEdits(Configuration conf) throws IOException { 948 return initializeSharedEdits(conf, true); 949 } 950 951 @VisibleForTesting 952 public static boolean initializeSharedEdits(Configuration conf, 953 boolean force) throws IOException { 954 return initializeSharedEdits(conf, force, false); 955 } 956 957 /** 958 * Clone the supplied configuration but remove the shared edits dirs. 959 * 960 * @param conf Supplies the original configuration. 961 * @return Cloned configuration without the shared edit dirs. 962 * @throws IOException on failure to generate the configuration. 963 */ 964 private static Configuration getConfigurationWithoutSharedEdits( 965 Configuration conf) 966 throws IOException { 967 List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false); 968 String editsDirsString = Joiner.on(",").join(editsDirs); 969 970 Configuration confWithoutShared = new Configuration(conf); 971 confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 972 confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, 973 editsDirsString); 974 return confWithoutShared; 975 } 976 977 /** 978 * Format a new shared edits dir and copy in enough edit log segments so that 979 * the standby NN can start up. 980 * 981 * @param conf configuration 982 * @param force format regardless of whether or not the shared edits dir exists 983 * @param interactive prompt the user when a dir exists 984 * @return true if the command aborts, false otherwise 985 */ 986 private static boolean initializeSharedEdits(Configuration conf, 987 boolean force, boolean interactive) throws IOException { 988 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 989 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 990 initializeGenericKeys(conf, nsId, namenodeId); 991 992 if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) { 993 LOG.fatal("No shared edits directory configured for namespace " + 994 nsId + " namenode " + namenodeId); 995 return false; 996 } 997 998 if (UserGroupInformation.isSecurityEnabled()) { 999 InetSocketAddress socAddr = getAddress(conf); 1000 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 1001 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 1002 } 1003 1004 NNStorage existingStorage = null; 1005 FSImage sharedEditsImage = null; 1006 try { 1007 FSNamesystem fsns = 1008 FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf)); 1009 1010 existingStorage = fsns.getFSImage().getStorage(); 1011 NamespaceInfo nsInfo = existingStorage.getNamespaceInfo(); 1012 1013 List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf); 1014 1015 sharedEditsImage = new FSImage(conf, 1016 Lists.<URI>newArrayList(), 1017 sharedEditsDirs); 1018 sharedEditsImage.getEditLog().initJournalsForWrite(); 1019 1020 if (!sharedEditsImage.confirmFormat(force, interactive)) { 1021 return true; // abort 1022 } 1023 1024 NNStorage newSharedStorage = sharedEditsImage.getStorage(); 1025 // Call Storage.format instead of FSImage.format here, since we don't 1026 // actually want to save a checkpoint - just prime the dirs with 1027 // the existing namespace info 1028 newSharedStorage.format(nsInfo); 1029 sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo); 1030 1031 // Need to make sure the edit log segments are in good shape to initialize 1032 // the shared edits dir. 1033 fsns.getFSImage().getEditLog().close(); 1034 fsns.getFSImage().getEditLog().initJournalsForWrite(); 1035 fsns.getFSImage().getEditLog().recoverUnclosedStreams(); 1036 1037 copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage, 1038 conf); 1039 } catch (IOException ioe) { 1040 LOG.error("Could not initialize shared edits dir", ioe); 1041 return true; // aborted 1042 } finally { 1043 if (sharedEditsImage != null) { 1044 try { 1045 sharedEditsImage.close(); 1046 } catch (IOException ioe) { 1047 LOG.warn("Could not close sharedEditsImage", ioe); 1048 } 1049 } 1050 // Have to unlock storage explicitly for the case when we're running in a 1051 // unit test, which runs in the same JVM as NNs. 1052 if (existingStorage != null) { 1053 try { 1054 existingStorage.unlockAll(); 1055 } catch (IOException ioe) { 1056 LOG.warn("Could not unlock storage directories", ioe); 1057 return true; // aborted 1058 } 1059 } 1060 } 1061 return false; // did not abort 1062 } 1063 1064 private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns, 1065 Collection<URI> sharedEditsDirs, NNStorage newSharedStorage, 1066 Configuration conf) throws IOException { 1067 Preconditions.checkArgument(!sharedEditsDirs.isEmpty(), 1068 "No shared edits specified"); 1069 // Copy edit log segments into the new shared edits dir. 1070 List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs); 1071 FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage, 1072 sharedEditsUris); 1073 newSharedEditLog.initJournalsForWrite(); 1074 newSharedEditLog.recoverUnclosedStreams(); 1075 1076 FSEditLog sourceEditLog = fsns.getFSImage().editLog; 1077 1078 long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId(); 1079 1080 Collection<EditLogInputStream> streams = null; 1081 try { 1082 streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0); 1083 1084 // Set the nextTxid to the CheckpointTxId+1 1085 newSharedEditLog.setNextTxId(fromTxId + 1); 1086 1087 // Copy all edits after last CheckpointTxId to shared edits dir 1088 for (EditLogInputStream stream : streams) { 1089 LOG.debug("Beginning to copy stream " + stream + " to shared edits"); 1090 FSEditLogOp op; 1091 boolean segmentOpen = false; 1092 while ((op = stream.readOp()) != null) { 1093 if (LOG.isTraceEnabled()) { 1094 LOG.trace("copying op: " + op); 1095 } 1096 if (!segmentOpen) { 1097 newSharedEditLog.startLogSegment(op.txid, false); 1098 segmentOpen = true; 1099 } 1100 1101 newSharedEditLog.logEdit(op); 1102 1103 if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) { 1104 newSharedEditLog.logSync(); 1105 newSharedEditLog.endCurrentLogSegment(false); 1106 LOG.debug("ending log segment because of END_LOG_SEGMENT op in " 1107 + stream); 1108 segmentOpen = false; 1109 } 1110 } 1111 1112 if (segmentOpen) { 1113 LOG.debug("ending log segment because of end of stream in " + stream); 1114 newSharedEditLog.logSync(); 1115 newSharedEditLog.endCurrentLogSegment(false); 1116 segmentOpen = false; 1117 } 1118 } 1119 } finally { 1120 if (streams != null) { 1121 FSEditLog.closeAllStreams(streams); 1122 } 1123 } 1124 } 1125 1126 @VisibleForTesting 1127 public static boolean doRollback(Configuration conf, 1128 boolean isConfirmationNeeded) throws IOException { 1129 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1130 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1131 initializeGenericKeys(conf, nsId, namenodeId); 1132 1133 FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf)); 1134 System.err.print( 1135 "\"rollBack\" will remove the current state of the file system,\n" 1136 + "returning you to the state prior to initiating your recent.\n" 1137 + "upgrade. This action is permanent and cannot be undone. If you\n" 1138 + "are performing a rollback in an HA environment, you should be\n" 1139 + "certain that no NameNode process is running on any host."); 1140 if (isConfirmationNeeded) { 1141 if (!confirmPrompt("Roll back file system state?")) { 1142 System.err.println("Rollback aborted."); 1143 return true; 1144 } 1145 } 1146 nsys.getFSImage().doRollback(nsys); 1147 return false; 1148 } 1149 1150 private static void printUsage(PrintStream out) { 1151 out.println(USAGE + "\n"); 1152 } 1153 1154 @VisibleForTesting 1155 static StartupOption parseArguments(String args[]) { 1156 int argsLen = (args == null) ? 0 : args.length; 1157 StartupOption startOpt = StartupOption.REGULAR; 1158 for(int i=0; i < argsLen; i++) { 1159 String cmd = args[i]; 1160 if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) { 1161 startOpt = StartupOption.FORMAT; 1162 for (i = i + 1; i < argsLen; i++) { 1163 if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1164 i++; 1165 if (i >= argsLen) { 1166 // if no cluster id specified, return null 1167 LOG.fatal("Must specify a valid cluster ID after the " 1168 + StartupOption.CLUSTERID.getName() + " flag"); 1169 return null; 1170 } 1171 String clusterId = args[i]; 1172 // Make sure an id is specified and not another flag 1173 if (clusterId.isEmpty() || 1174 clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) || 1175 clusterId.equalsIgnoreCase( 1176 StartupOption.NONINTERACTIVE.getName())) { 1177 LOG.fatal("Must specify a valid cluster ID after the " 1178 + StartupOption.CLUSTERID.getName() + " flag"); 1179 return null; 1180 } 1181 startOpt.setClusterId(clusterId); 1182 } 1183 1184 if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) { 1185 startOpt.setForceFormat(true); 1186 } 1187 1188 if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) { 1189 startOpt.setInteractiveFormat(false); 1190 } 1191 } 1192 } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) { 1193 startOpt = StartupOption.GENCLUSTERID; 1194 } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) { 1195 startOpt = StartupOption.REGULAR; 1196 } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) { 1197 startOpt = StartupOption.BACKUP; 1198 } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) { 1199 startOpt = StartupOption.CHECKPOINT; 1200 } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) { 1201 startOpt = StartupOption.UPGRADE; 1202 /* Can be followed by CLUSTERID with a required parameter or 1203 * RENAMERESERVED with an optional parameter 1204 */ 1205 while (i + 1 < argsLen) { 1206 String flag = args[i + 1]; 1207 if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1208 if (i + 2 < argsLen) { 1209 i += 2; 1210 startOpt.setClusterId(args[i]); 1211 } else { 1212 LOG.fatal("Must specify a valid cluster ID after the " 1213 + StartupOption.CLUSTERID.getName() + " flag"); 1214 return null; 1215 } 1216 } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED 1217 .getName())) { 1218 if (i + 2 < argsLen) { 1219 FSImageFormat.setRenameReservedPairs(args[i + 2]); 1220 i += 2; 1221 } else { 1222 FSImageFormat.useDefaultRenameReservedPairs(); 1223 i += 1; 1224 } 1225 } else { 1226 LOG.fatal("Unknown upgrade flag " + flag); 1227 return null; 1228 } 1229 } 1230 } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) { 1231 startOpt = StartupOption.ROLLINGUPGRADE; 1232 ++i; 1233 startOpt.setRollingUpgradeStartupOption(args[i]); 1234 } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) { 1235 startOpt = StartupOption.ROLLBACK; 1236 } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) { 1237 startOpt = StartupOption.FINALIZE; 1238 } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) { 1239 startOpt = StartupOption.IMPORT; 1240 } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) { 1241 startOpt = StartupOption.BOOTSTRAPSTANDBY; 1242 return startOpt; 1243 } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) { 1244 startOpt = StartupOption.INITIALIZESHAREDEDITS; 1245 for (i = i + 1 ; i < argsLen; i++) { 1246 if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) { 1247 startOpt.setInteractiveFormat(false); 1248 } else if (StartupOption.FORCE.getName().equals(args[i])) { 1249 startOpt.setForceFormat(true); 1250 } else { 1251 LOG.fatal("Invalid argument: " + args[i]); 1252 return null; 1253 } 1254 } 1255 return startOpt; 1256 } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) { 1257 if (startOpt != StartupOption.REGULAR) { 1258 throw new RuntimeException("Can't combine -recover with " + 1259 "other startup options."); 1260 } 1261 startOpt = StartupOption.RECOVER; 1262 while (++i < argsLen) { 1263 if (args[i].equalsIgnoreCase( 1264 StartupOption.FORCE.getName())) { 1265 startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE); 1266 } else { 1267 throw new RuntimeException("Error parsing recovery options: " + 1268 "can't understand option \"" + args[i] + "\""); 1269 } 1270 } 1271 } else if (StartupOption.METADATAVERSION.getName().equalsIgnoreCase(cmd)) { 1272 startOpt = StartupOption.METADATAVERSION; 1273 } else { 1274 return null; 1275 } 1276 } 1277 return startOpt; 1278 } 1279 1280 private static void setStartupOption(Configuration conf, StartupOption opt) { 1281 conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name()); 1282 } 1283 1284 static StartupOption getStartupOption(Configuration conf) { 1285 return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY, 1286 StartupOption.REGULAR.toString())); 1287 } 1288 1289 private static void doRecovery(StartupOption startOpt, Configuration conf) 1290 throws IOException { 1291 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1292 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1293 initializeGenericKeys(conf, nsId, namenodeId); 1294 if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) { 1295 if (!confirmPrompt("You have selected Metadata Recovery mode. " + 1296 "This mode is intended to recover lost metadata on a corrupt " + 1297 "filesystem. Metadata recovery mode often permanently deletes " + 1298 "data from your HDFS filesystem. Please back up your edit log " + 1299 "and fsimage before trying this!\n\n" + 1300 "Are you ready to proceed? (Y/N)\n")) { 1301 System.err.println("Recovery aborted at user request.\n"); 1302 return; 1303 } 1304 } 1305 MetaRecoveryContext.LOG.info("starting recovery..."); 1306 UserGroupInformation.setConfiguration(conf); 1307 NameNode.initMetrics(conf, startOpt.toNodeRole()); 1308 FSNamesystem fsn = null; 1309 try { 1310 fsn = FSNamesystem.loadFromDisk(conf); 1311 fsn.getFSImage().saveNamespace(fsn); 1312 MetaRecoveryContext.LOG.info("RECOVERY COMPLETE"); 1313 } catch (IOException e) { 1314 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1315 throw e; 1316 } catch (RuntimeException e) { 1317 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1318 throw e; 1319 } finally { 1320 if (fsn != null) 1321 fsn.close(); 1322 } 1323 } 1324 1325 /** 1326 * Verify that configured directories exist, then print the metadata versions 1327 * of the software and the image. 1328 * 1329 * @param conf configuration to use 1330 * @throws IOException 1331 */ 1332 private static boolean printMetadataVersion(Configuration conf) 1333 throws IOException { 1334 final FSImage fsImage = new FSImage(conf); 1335 final FSNamesystem fs = new FSNamesystem(conf, fsImage, false); 1336 return fsImage.recoverTransitionRead( 1337 StartupOption.METADATAVERSION, fs, null); 1338 } 1339 1340 public static NameNode createNameNode(String argv[], Configuration conf) 1341 throws IOException { 1342 LOG.info("createNameNode " + Arrays.asList(argv)); 1343 if (conf == null) 1344 conf = new HdfsConfiguration(); 1345 StartupOption startOpt = parseArguments(argv); 1346 if (startOpt == null) { 1347 printUsage(System.err); 1348 return null; 1349 } 1350 setStartupOption(conf, startOpt); 1351 1352 switch (startOpt) { 1353 case FORMAT: { 1354 boolean aborted = format(conf, startOpt.getForceFormat(), 1355 startOpt.getInteractiveFormat()); 1356 terminate(aborted ? 1 : 0); 1357 return null; // avoid javac warning 1358 } 1359 case GENCLUSTERID: { 1360 System.err.println("Generating new cluster id:"); 1361 System.out.println(NNStorage.newClusterID()); 1362 terminate(0); 1363 return null; 1364 } 1365 case FINALIZE: { 1366 System.err.println("Use of the argument '" + StartupOption.FINALIZE + 1367 "' is no longer supported. To finalize an upgrade, start the NN " + 1368 " and then run `hdfs dfsadmin -finalizeUpgrade'"); 1369 terminate(1); 1370 return null; // avoid javac warning 1371 } 1372 case ROLLBACK: { 1373 boolean aborted = doRollback(conf, true); 1374 terminate(aborted ? 1 : 0); 1375 return null; // avoid warning 1376 } 1377 case BOOTSTRAPSTANDBY: { 1378 String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length); 1379 int rc = BootstrapStandby.run(toolArgs, conf); 1380 terminate(rc); 1381 return null; // avoid warning 1382 } 1383 case INITIALIZESHAREDEDITS: { 1384 boolean aborted = initializeSharedEdits(conf, 1385 startOpt.getForceFormat(), 1386 startOpt.getInteractiveFormat()); 1387 terminate(aborted ? 1 : 0); 1388 return null; // avoid warning 1389 } 1390 case BACKUP: 1391 case CHECKPOINT: { 1392 NamenodeRole role = startOpt.toNodeRole(); 1393 DefaultMetricsSystem.initialize(role.toString().replace(" ", "")); 1394 return new BackupNode(conf, role); 1395 } 1396 case RECOVER: { 1397 NameNode.doRecovery(startOpt, conf); 1398 return null; 1399 } 1400 case METADATAVERSION: { 1401 printMetadataVersion(conf); 1402 terminate(0); 1403 return null; // avoid javac warning 1404 } 1405 default: { 1406 DefaultMetricsSystem.initialize("NameNode"); 1407 return new NameNode(conf); 1408 } 1409 } 1410 } 1411 1412 /** 1413 * In federation configuration is set for a set of 1414 * namenode and secondary namenode/backup/checkpointer, which are 1415 * grouped under a logical nameservice ID. The configuration keys specific 1416 * to them have suffix set to configured nameserviceId. 1417 * 1418 * This method copies the value from specific key of format key.nameserviceId 1419 * to key, to set up the generic configuration. Once this is done, only 1420 * generic version of the configuration is read in rest of the code, for 1421 * backward compatibility and simpler code changes. 1422 * 1423 * @param conf 1424 * Configuration object to lookup specific key and to set the value 1425 * to the key passed. Note the conf object is modified 1426 * @param nameserviceId name service Id (to distinguish federated NNs) 1427 * @param namenodeId the namenode ID (to distinguish HA NNs) 1428 * @see DFSUtil#setGenericConf(Configuration, String, String, String...) 1429 */ 1430 public static void initializeGenericKeys(Configuration conf, 1431 String nameserviceId, String namenodeId) { 1432 if ((nameserviceId != null && !nameserviceId.isEmpty()) || 1433 (namenodeId != null && !namenodeId.isEmpty())) { 1434 if (nameserviceId != null) { 1435 conf.set(DFS_NAMESERVICE_ID, nameserviceId); 1436 } 1437 if (namenodeId != null) { 1438 conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId); 1439 } 1440 1441 DFSUtil.setGenericConf(conf, nameserviceId, namenodeId, 1442 NAMENODE_SPECIFIC_KEYS); 1443 DFSUtil.setGenericConf(conf, nameserviceId, null, 1444 NAMESERVICE_SPECIFIC_KEYS); 1445 } 1446 1447 // If the RPC address is set use it to (re-)configure the default FS 1448 if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) { 1449 URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 1450 + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY)); 1451 conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); 1452 LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString()); 1453 } 1454 } 1455 1456 /** 1457 * Get the name service Id for the node 1458 * @return name service Id or null if federation is not configured 1459 */ 1460 protected String getNameServiceId(Configuration conf) { 1461 return DFSUtil.getNamenodeNameServiceId(conf); 1462 } 1463 1464 /** 1465 */ 1466 public static void main(String argv[]) throws Exception { 1467 if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) { 1468 System.exit(0); 1469 } 1470 1471 try { 1472 StringUtils.startupShutdownMessage(NameNode.class, argv, LOG); 1473 NameNode namenode = createNameNode(argv, null); 1474 if (namenode != null) { 1475 namenode.join(); 1476 } 1477 } catch (Throwable e) { 1478 LOG.fatal("Exception in namenode join", e); 1479 terminate(1, e); 1480 } 1481 } 1482 1483 synchronized void monitorHealth() 1484 throws HealthCheckFailedException, AccessControlException { 1485 namesystem.checkSuperuserPrivilege(); 1486 if (!haEnabled) { 1487 return; // no-op, if HA is not enabled 1488 } 1489 getNamesystem().checkAvailableResources(); 1490 if (!getNamesystem().nameNodeHasResourcesAvailable()) { 1491 throw new HealthCheckFailedException( 1492 "The NameNode has no resources available"); 1493 } 1494 } 1495 1496 synchronized void transitionToActive() 1497 throws ServiceFailedException, AccessControlException { 1498 namesystem.checkSuperuserPrivilege(); 1499 if (!haEnabled) { 1500 throw new ServiceFailedException("HA for namenode is not enabled"); 1501 } 1502 state.setState(haContext, ACTIVE_STATE); 1503 } 1504 1505 synchronized void transitionToStandby() 1506 throws ServiceFailedException, AccessControlException { 1507 namesystem.checkSuperuserPrivilege(); 1508 if (!haEnabled) { 1509 throw new ServiceFailedException("HA for namenode is not enabled"); 1510 } 1511 state.setState(haContext, STANDBY_STATE); 1512 } 1513 1514 synchronized HAServiceStatus getServiceStatus() 1515 throws ServiceFailedException, AccessControlException { 1516 namesystem.checkSuperuserPrivilege(); 1517 if (!haEnabled) { 1518 throw new ServiceFailedException("HA for namenode is not enabled"); 1519 } 1520 if (state == null) { 1521 return new HAServiceStatus(HAServiceState.INITIALIZING); 1522 } 1523 HAServiceState retState = state.getServiceState(); 1524 HAServiceStatus ret = new HAServiceStatus(retState); 1525 if (retState == HAServiceState.STANDBY) { 1526 String safemodeTip = namesystem.getSafeModeTip(); 1527 if (!safemodeTip.isEmpty()) { 1528 ret.setNotReadyToBecomeActive( 1529 "The NameNode is in safemode. " + 1530 safemodeTip); 1531 } else { 1532 ret.setReadyToBecomeActive(); 1533 } 1534 } else if (retState == HAServiceState.ACTIVE) { 1535 ret.setReadyToBecomeActive(); 1536 } else { 1537 ret.setNotReadyToBecomeActive("State is " + state); 1538 } 1539 return ret; 1540 } 1541 1542 synchronized HAServiceState getServiceState() { 1543 if (state == null) { 1544 return HAServiceState.INITIALIZING; 1545 } 1546 return state.getServiceState(); 1547 } 1548 1549 /** 1550 * Register NameNodeStatusMXBean 1551 */ 1552 private void registerNNSMXBean() { 1553 nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this); 1554 } 1555 1556 @Override // NameNodeStatusMXBean 1557 public String getNNRole() { 1558 String roleStr = ""; 1559 NamenodeRole role = getRole(); 1560 if (null != role) { 1561 roleStr = role.toString(); 1562 } 1563 return roleStr; 1564 } 1565 1566 @Override // NameNodeStatusMXBean 1567 public String getState() { 1568 String servStateStr = ""; 1569 HAServiceState servState = getServiceState(); 1570 if (null != servState) { 1571 servStateStr = servState.toString(); 1572 } 1573 return servStateStr; 1574 } 1575 1576 @Override // NameNodeStatusMXBean 1577 public String getHostAndPort() { 1578 return getNameNodeAddressHostPortString(); 1579 } 1580 1581 @Override // NameNodeStatusMXBean 1582 public boolean isSecurityEnabled() { 1583 return UserGroupInformation.isSecurityEnabled(); 1584 } 1585 1586 /** 1587 * Shutdown the NN immediately in an ungraceful way. Used when it would be 1588 * unsafe for the NN to continue operating, e.g. during a failed HA state 1589 * transition. 1590 * 1591 * @param t exception which warrants the shutdown. Printed to the NN log 1592 * before exit. 1593 * @throws ExitException thrown only for testing. 1594 */ 1595 protected synchronized void doImmediateShutdown(Throwable t) 1596 throws ExitException { 1597 String message = "Error encountered requiring NN shutdown. " + 1598 "Shutting down immediately."; 1599 try { 1600 LOG.fatal(message, t); 1601 } catch (Throwable ignored) { 1602 // This is unlikely to happen, but there's nothing we can do if it does. 1603 } 1604 terminate(1, t); 1605 } 1606 1607 /** 1608 * Class used to expose {@link NameNode} as context to {@link HAState} 1609 */ 1610 protected class NameNodeHAContext implements HAContext { 1611 @Override 1612 public void setState(HAState s) { 1613 state = s; 1614 } 1615 1616 @Override 1617 public HAState getState() { 1618 return state; 1619 } 1620 1621 @Override 1622 public void startActiveServices() throws IOException { 1623 try { 1624 namesystem.startActiveServices(); 1625 startTrashEmptier(conf); 1626 } catch (Throwable t) { 1627 doImmediateShutdown(t); 1628 } 1629 } 1630 1631 @Override 1632 public void stopActiveServices() throws IOException { 1633 try { 1634 if (namesystem != null) { 1635 namesystem.stopActiveServices(); 1636 } 1637 stopTrashEmptier(); 1638 } catch (Throwable t) { 1639 doImmediateShutdown(t); 1640 } 1641 } 1642 1643 @Override 1644 public void startStandbyServices() throws IOException { 1645 try { 1646 namesystem.startStandbyServices(conf); 1647 } catch (Throwable t) { 1648 doImmediateShutdown(t); 1649 } 1650 } 1651 1652 @Override 1653 public void prepareToStopStandbyServices() throws ServiceFailedException { 1654 try { 1655 namesystem.prepareToStopStandbyServices(); 1656 } catch (Throwable t) { 1657 doImmediateShutdown(t); 1658 } 1659 } 1660 1661 @Override 1662 public void stopStandbyServices() throws IOException { 1663 try { 1664 if (namesystem != null) { 1665 namesystem.stopStandbyServices(); 1666 } 1667 } catch (Throwable t) { 1668 doImmediateShutdown(t); 1669 } 1670 } 1671 1672 @Override 1673 public void writeLock() { 1674 namesystem.writeLock(); 1675 namesystem.lockRetryCache(); 1676 } 1677 1678 @Override 1679 public void writeUnlock() { 1680 namesystem.unlockRetryCache(); 1681 namesystem.writeUnlock(); 1682 } 1683 1684 /** Check if an operation of given category is allowed */ 1685 @Override 1686 public void checkOperation(final OperationCategory op) 1687 throws StandbyException { 1688 state.checkOperation(haContext, op); 1689 } 1690 1691 @Override 1692 public boolean allowStaleReads() { 1693 return allowStaleStandbyReads; 1694 } 1695 1696 } 1697 1698 public boolean isStandbyState() { 1699 return (state.equals(STANDBY_STATE)); 1700 } 1701 1702 public boolean isActiveState() { 1703 return (state.equals(ACTIVE_STATE)); 1704 } 1705 1706 /** 1707 * Check that a request to change this node's HA state is valid. 1708 * In particular, verifies that, if auto failover is enabled, non-forced 1709 * requests from the HAAdmin CLI are rejected, and vice versa. 1710 * 1711 * @param req the request to check 1712 * @throws AccessControlException if the request is disallowed 1713 */ 1714 void checkHaStateChange(StateChangeRequestInfo req) 1715 throws AccessControlException { 1716 boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY, 1717 DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT); 1718 switch (req.getSource()) { 1719 case REQUEST_BY_USER: 1720 if (autoHaEnabled) { 1721 throw new AccessControlException( 1722 "Manual HA control for this NameNode is disallowed, because " + 1723 "automatic HA is enabled."); 1724 } 1725 break; 1726 case REQUEST_BY_USER_FORCED: 1727 if (autoHaEnabled) { 1728 LOG.warn("Allowing manual HA control from " + 1729 Server.getRemoteAddress() + 1730 " even though automatic HA is enabled, because the user " + 1731 "specified the force flag"); 1732 } 1733 break; 1734 case REQUEST_BY_ZKFC: 1735 if (!autoHaEnabled) { 1736 throw new AccessControlException( 1737 "Request from ZK failover controller at " + 1738 Server.getRemoteAddress() + " denied since automatic HA " + 1739 "is not enabled"); 1740 } 1741 break; 1742 } 1743 } 1744}