001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.hdfs.server.namenode; 019 020 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 021 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 022 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 023 024 import java.io.File; 025 import java.io.IOException; 026 import java.io.PrintStream; 027 import java.net.InetSocketAddress; 028 import java.net.URI; 029 import java.security.PrivilegedExceptionAction; 030 import java.util.ArrayList; 031 import java.util.Arrays; 032 import java.util.Collection; 033 import java.util.Iterator; 034 import java.util.List; 035 036 import javax.management.ObjectName; 037 038 import org.apache.commons.logging.Log; 039 import org.apache.commons.logging.LogFactory; 040 import org.apache.hadoop.HadoopIllegalArgumentException; 041 import org.apache.hadoop.classification.InterfaceAudience; 042 import org.apache.hadoop.conf.Configuration; 043 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 044 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; 045 import org.apache.hadoop.ha.HAServiceStatus; 046 import org.apache.hadoop.ha.HealthCheckFailedException; 047 import org.apache.hadoop.ha.ServiceFailedException; 048 import org.apache.hadoop.fs.FileSystem; 049 import org.apache.hadoop.fs.FileUtil; 050 import org.apache.hadoop.fs.Trash; 051 052 import static org.apache.hadoop.hdfs.DFSConfigKeys.*; 053 import static org.apache.hadoop.util.ExitUtil.terminate; 054 import static org.apache.hadoop.util.ToolRunner.confirmPrompt; 055 056 import org.apache.hadoop.hdfs.DFSConfigKeys; 057 import org.apache.hadoop.hdfs.DFSUtil; 058 import org.apache.hadoop.hdfs.HAUtil; 059 import org.apache.hadoop.hdfs.HdfsConfiguration; 060 import org.apache.hadoop.hdfs.protocol.ClientProtocol; 061 import org.apache.hadoop.hdfs.protocol.HdfsConstants; 062 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 063 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 064 import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState; 065 import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby; 066 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; 067 import org.apache.hadoop.hdfs.server.namenode.ha.HAState; 068 import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState; 069 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 070 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 071 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics; 072 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; 073 import org.apache.hadoop.hdfs.server.protocol.JournalProtocol; 074 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; 075 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; 076 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; 077 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 078 import org.apache.hadoop.ipc.Server; 079 import org.apache.hadoop.ipc.StandbyException; 080 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 081 import org.apache.hadoop.metrics2.util.MBeans; 082 import org.apache.hadoop.net.NetUtils; 083 import org.apache.hadoop.security.AccessControlException; 084 import org.apache.hadoop.security.RefreshUserMappingsProtocol; 085 import org.apache.hadoop.security.SecurityUtil; 086 import org.apache.hadoop.security.UserGroupInformation; 087 import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; 088 import org.apache.hadoop.tools.GetUserMappingsProtocol; 089 import org.apache.hadoop.util.ExitUtil.ExitException; 090 import org.apache.hadoop.util.JvmPauseMonitor; 091 import org.apache.hadoop.util.ServicePlugin; 092 import org.apache.hadoop.util.StringUtils; 093 094 import com.google.common.annotations.VisibleForTesting; 095 import com.google.common.base.Joiner; 096 import com.google.common.base.Preconditions; 097 import com.google.common.collect.Lists; 098 099 /********************************************************** 100 * NameNode serves as both directory namespace manager and 101 * "inode table" for the Hadoop DFS. There is a single NameNode 102 * running in any DFS deployment. (Well, except when there 103 * is a second backup/failover NameNode, or when using federated NameNodes.) 104 * 105 * The NameNode controls two critical tables: 106 * 1) filename->blocksequence (namespace) 107 * 2) block->machinelist ("inodes") 108 * 109 * The first table is stored on disk and is very precious. 110 * The second table is rebuilt every time the NameNode comes up. 111 * 112 * 'NameNode' refers to both this class as well as the 'NameNode server'. 113 * The 'FSNamesystem' class actually performs most of the filesystem 114 * management. The majority of the 'NameNode' class itself is concerned 115 * with exposing the IPC interface and the HTTP server to the outside world, 116 * plus some configuration management. 117 * 118 * NameNode implements the 119 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which 120 * allows clients to ask for DFS services. 121 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for 122 * direct use by authors of DFS client code. End-users should instead use the 123 * {@link org.apache.hadoop.fs.FileSystem} class. 124 * 125 * NameNode also implements the 126 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface, 127 * used by DataNodes that actually store DFS data blocks. These 128 * methods are invoked repeatedly and automatically by all the 129 * DataNodes in a DFS deployment. 130 * 131 * NameNode also implements the 132 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface, 133 * used by secondary namenodes or rebalancing processes to get partial 134 * NameNode state, for example partial blocksMap etc. 135 **********************************************************/ 136 @InterfaceAudience.Private 137 public class NameNode implements NameNodeStatusMXBean { 138 static{ 139 HdfsConfiguration.init(); 140 } 141 142 /** 143 * Categories of operations supported by the namenode. 144 */ 145 public static enum OperationCategory { 146 /** Operations that are state agnostic */ 147 UNCHECKED, 148 /** Read operation that does not change the namespace state */ 149 READ, 150 /** Write operation that changes the namespace state */ 151 WRITE, 152 /** Operations related to checkpointing */ 153 CHECKPOINT, 154 /** Operations related to {@link JournalProtocol} */ 155 JOURNAL 156 } 157 158 /** 159 * HDFS configuration can have three types of parameters: 160 * <ol> 161 * <li>Parameters that are common for all the name services in the cluster.</li> 162 * <li>Parameters that are specific to a name service. These keys are suffixed 163 * with nameserviceId in the configuration. For example, 164 * "dfs.namenode.rpc-address.nameservice1".</li> 165 * <li>Parameters that are specific to a single name node. These keys are suffixed 166 * with nameserviceId and namenodeId in the configuration. for example, 167 * "dfs.namenode.rpc-address.nameservice1.namenode1"</li> 168 * </ol> 169 * 170 * In the latter cases, operators may specify the configuration without 171 * any suffix, with a nameservice suffix, or with a nameservice and namenode 172 * suffix. The more specific suffix will take precedence. 173 * 174 * These keys are specific to a given namenode, and thus may be configured 175 * globally, for a nameservice, or for a specific namenode within a nameservice. 176 */ 177 public static final String[] NAMENODE_SPECIFIC_KEYS = { 178 DFS_NAMENODE_RPC_ADDRESS_KEY, 179 DFS_NAMENODE_RPC_BIND_HOST_KEY, 180 DFS_NAMENODE_NAME_DIR_KEY, 181 DFS_NAMENODE_EDITS_DIR_KEY, 182 DFS_NAMENODE_SHARED_EDITS_DIR_KEY, 183 DFS_NAMENODE_CHECKPOINT_DIR_KEY, 184 DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY, 185 DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, 186 DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY, 187 DFS_NAMENODE_HTTP_ADDRESS_KEY, 188 DFS_NAMENODE_KEYTAB_FILE_KEY, 189 DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, 190 DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY, 191 DFS_NAMENODE_BACKUP_ADDRESS_KEY, 192 DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY, 193 DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY, 194 DFS_NAMENODE_USER_NAME_KEY, 195 DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY, 196 DFS_HA_FENCE_METHODS_KEY, 197 DFS_HA_ZKFC_PORT_KEY, 198 DFS_HA_FENCE_METHODS_KEY 199 }; 200 201 /** 202 * @see #NAMENODE_SPECIFIC_KEYS 203 * These keys are specific to a nameservice, but may not be overridden 204 * for a specific namenode. 205 */ 206 public static final String[] NAMESERVICE_SPECIFIC_KEYS = { 207 DFS_HA_AUTO_FAILOVER_ENABLED_KEY 208 }; 209 210 private static final String USAGE = "Usage: java NameNode [" 211 + StartupOption.BACKUP.getName() + "] | [" 212 + StartupOption.CHECKPOINT.getName() + "] | [" 213 + StartupOption.FORMAT.getName() + " [" 214 + StartupOption.CLUSTERID.getName() + " cid ] [" 215 + StartupOption.FORCE.getName() + "] [" 216 + StartupOption.NONINTERACTIVE.getName() + "] ] | [" 217 + StartupOption.UPGRADE.getName() + "] | [" 218 + StartupOption.ROLLBACK.getName() + "] | [" 219 + StartupOption.FINALIZE.getName() + "] | [" 220 + StartupOption.IMPORT.getName() + "] | [" 221 + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | [" 222 + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" 223 + StartupOption.RECOVER.getName() + " [ " + StartupOption.FORCE.getName() 224 + " ] ]"; 225 226 public long getProtocolVersion(String protocol, 227 long clientVersion) throws IOException { 228 if (protocol.equals(ClientProtocol.class.getName())) { 229 return ClientProtocol.versionID; 230 } else if (protocol.equals(DatanodeProtocol.class.getName())){ 231 return DatanodeProtocol.versionID; 232 } else if (protocol.equals(NamenodeProtocol.class.getName())){ 233 return NamenodeProtocol.versionID; 234 } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){ 235 return RefreshAuthorizationPolicyProtocol.versionID; 236 } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){ 237 return RefreshUserMappingsProtocol.versionID; 238 } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){ 239 return GetUserMappingsProtocol.versionID; 240 } else { 241 throw new IOException("Unknown protocol to name node: " + protocol); 242 } 243 } 244 245 public static final int DEFAULT_PORT = 8020; 246 public static final Log LOG = LogFactory.getLog(NameNode.class.getName()); 247 public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange"); 248 public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange"); 249 public static final HAState ACTIVE_STATE = new ActiveState(); 250 public static final HAState STANDBY_STATE = new StandbyState(); 251 252 protected FSNamesystem namesystem; 253 protected final Configuration conf; 254 protected NamenodeRole role; 255 private volatile HAState state; 256 private final boolean haEnabled; 257 private final HAContext haContext; 258 protected boolean allowStaleStandbyReads; 259 260 261 /** httpServer */ 262 protected NameNodeHttpServer httpServer; 263 private Thread emptier; 264 /** only used for testing purposes */ 265 protected boolean stopRequested = false; 266 /** Registration information of this name-node */ 267 protected NamenodeRegistration nodeRegistration; 268 /** Activated plug-ins. */ 269 private List<ServicePlugin> plugins; 270 271 private NameNodeRpcServer rpcServer; 272 273 private JvmPauseMonitor pauseMonitor; 274 private ObjectName nameNodeStatusBeanName; 275 276 /** Format a new filesystem. Destroys any filesystem that may already 277 * exist at this location. **/ 278 public static void format(Configuration conf) throws IOException { 279 format(conf, true, true); 280 } 281 282 static NameNodeMetrics metrics; 283 private static final StartupProgress startupProgress = new StartupProgress(); 284 /** Return the {@link FSNamesystem} object. 285 * @return {@link FSNamesystem} object. 286 */ 287 public FSNamesystem getNamesystem() { 288 return namesystem; 289 } 290 291 public NamenodeProtocols getRpcServer() { 292 return rpcServer; 293 } 294 295 static void initMetrics(Configuration conf, NamenodeRole role) { 296 metrics = NameNodeMetrics.create(conf, role); 297 } 298 299 public static NameNodeMetrics getNameNodeMetrics() { 300 return metrics; 301 } 302 303 /** 304 * Returns object used for reporting namenode startup progress. 305 * 306 * @return StartupProgress for reporting namenode startup progress 307 */ 308 public static StartupProgress getStartupProgress() { 309 return startupProgress; 310 } 311 312 public static InetSocketAddress getAddress(String address) { 313 return NetUtils.createSocketAddr(address, DEFAULT_PORT); 314 } 315 316 /** 317 * Set the configuration property for the service rpc address 318 * to address 319 */ 320 public static void setServiceAddress(Configuration conf, 321 String address) { 322 LOG.info("Setting ADDRESS " + address); 323 conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address); 324 } 325 326 /** 327 * Fetches the address for services to use when connecting to namenode 328 * based on the value of fallback returns null if the special 329 * address is not specified or returns the default namenode address 330 * to be used by both clients and services. 331 * Services here are datanodes, backup node, any non client connection 332 */ 333 public static InetSocketAddress getServiceAddress(Configuration conf, 334 boolean fallback) { 335 String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY); 336 if (addr == null || addr.isEmpty()) { 337 return fallback ? getAddress(conf) : null; 338 } 339 return getAddress(addr); 340 } 341 342 public static InetSocketAddress getAddress(Configuration conf) { 343 URI filesystemURI = FileSystem.getDefaultUri(conf); 344 return getAddress(filesystemURI); 345 } 346 347 348 /** 349 * TODO:FEDERATION 350 * @param filesystemURI 351 * @return address of file system 352 */ 353 public static InetSocketAddress getAddress(URI filesystemURI) { 354 String authority = filesystemURI.getAuthority(); 355 if (authority == null) { 356 throw new IllegalArgumentException(String.format( 357 "Invalid URI for NameNode address (check %s): %s has no authority.", 358 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString())); 359 } 360 if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase( 361 filesystemURI.getScheme())) { 362 throw new IllegalArgumentException(String.format( 363 "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.", 364 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(), 365 HdfsConstants.HDFS_URI_SCHEME)); 366 } 367 return getAddress(authority); 368 } 369 370 public static URI getUri(InetSocketAddress namenode) { 371 int port = namenode.getPort(); 372 String portString = port == DEFAULT_PORT ? "" : (":"+port); 373 return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 374 + namenode.getHostName()+portString); 375 } 376 377 // 378 // Common NameNode methods implementation for the active name-node role. 379 // 380 public NamenodeRole getRole() { 381 return role; 382 } 383 384 boolean isRole(NamenodeRole that) { 385 return role.equals(that); 386 } 387 388 /** 389 * Given a configuration get the address of the service rpc server 390 * If the service rpc is not configured returns null 391 */ 392 protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) { 393 return NameNode.getServiceAddress(conf, false); 394 } 395 396 protected InetSocketAddress getRpcServerAddress(Configuration conf) { 397 return getAddress(conf); 398 } 399 400 /** Given a configuration get the bind host of the service rpc server 401 * If the bind host is not configured returns null. 402 */ 403 protected String getServiceRpcServerBindHost(Configuration conf) { 404 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY); 405 if (addr == null || addr.isEmpty()) { 406 return null; 407 } 408 return addr; 409 } 410 411 /** Given a configuration get the bind host of the client rpc server 412 * If the bind host is not configured returns null. 413 */ 414 protected String getRpcServerBindHost(Configuration conf) { 415 String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY); 416 if (addr == null || addr.isEmpty()) { 417 return null; 418 } 419 return addr; 420 } 421 422 /** 423 * Modifies the configuration passed to contain the service rpc address setting 424 */ 425 protected void setRpcServiceServerAddress(Configuration conf, 426 InetSocketAddress serviceRPCAddress) { 427 setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress)); 428 } 429 430 protected void setRpcServerAddress(Configuration conf, 431 InetSocketAddress rpcAddress) { 432 FileSystem.setDefaultUri(conf, getUri(rpcAddress)); 433 } 434 435 protected InetSocketAddress getHttpServerAddress(Configuration conf) { 436 return getHttpAddress(conf); 437 } 438 439 /** @return the NameNode HTTP address. */ 440 public static InetSocketAddress getHttpAddress(Configuration conf) { 441 return NetUtils.createSocketAddr( 442 conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT)); 443 } 444 445 protected void loadNamesystem(Configuration conf) throws IOException { 446 this.namesystem = FSNamesystem.loadFromDisk(conf); 447 } 448 449 NamenodeRegistration getRegistration() { 450 return nodeRegistration; 451 } 452 453 NamenodeRegistration setRegistration() { 454 nodeRegistration = new NamenodeRegistration( 455 NetUtils.getHostPortString(rpcServer.getRpcAddress()), 456 NetUtils.getHostPortString(getHttpAddress()), 457 getFSImage().getStorage(), getRole()); 458 return nodeRegistration; 459 } 460 461 /* optimize ugi lookup for RPC operations to avoid a trip through 462 * UGI.getCurrentUser which is synch'ed 463 */ 464 public static UserGroupInformation getRemoteUser() throws IOException { 465 UserGroupInformation ugi = Server.getRemoteUser(); 466 return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser(); 467 } 468 469 470 /** 471 * Login as the configured user for the NameNode. 472 */ 473 void loginAsNameNodeUser(Configuration conf) throws IOException { 474 InetSocketAddress socAddr = getRpcServerAddress(conf); 475 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 476 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName()); 477 } 478 479 /** 480 * Initialize name-node. 481 * 482 * @param conf the configuration 483 */ 484 protected void initialize(Configuration conf) throws IOException { 485 if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) { 486 String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY); 487 if (intervals != null) { 488 conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS, 489 intervals); 490 } 491 } 492 493 UserGroupInformation.setConfiguration(conf); 494 loginAsNameNodeUser(conf); 495 496 NameNode.initMetrics(conf, this.getRole()); 497 StartupProgressMetrics.register(startupProgress); 498 499 if (NamenodeRole.NAMENODE == role) { 500 startHttpServer(conf); 501 } 502 loadNamesystem(conf); 503 504 rpcServer = createRpcServer(conf); 505 if (NamenodeRole.NAMENODE == role) { 506 httpServer.setNameNodeAddress(getNameNodeAddress()); 507 httpServer.setFSImage(getFSImage()); 508 } 509 510 pauseMonitor = new JvmPauseMonitor(conf); 511 pauseMonitor.start(); 512 513 startCommonServices(conf); 514 } 515 516 /** 517 * Create the RPC server implementation. Used as an extension point for the 518 * BackupNode. 519 */ 520 protected NameNodeRpcServer createRpcServer(Configuration conf) 521 throws IOException { 522 return new NameNodeRpcServer(conf, this); 523 } 524 525 /** Start the services common to active and standby states */ 526 private void startCommonServices(Configuration conf) throws IOException { 527 namesystem.startCommonServices(conf, haContext); 528 registerNNSMXBean(); 529 if (NamenodeRole.NAMENODE != role) { 530 startHttpServer(conf); 531 httpServer.setNameNodeAddress(getNameNodeAddress()); 532 httpServer.setFSImage(getFSImage()); 533 } 534 rpcServer.start(); 535 plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY, 536 ServicePlugin.class); 537 for (ServicePlugin p: plugins) { 538 try { 539 p.start(this); 540 } catch (Throwable t) { 541 LOG.warn("ServicePlugin " + p + " could not be started", t); 542 } 543 } 544 LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress()); 545 if (rpcServer.getServiceRpcAddress() != null) { 546 LOG.info(getRole() + " service RPC up at: " 547 + rpcServer.getServiceRpcAddress()); 548 } 549 } 550 551 private void stopCommonServices() { 552 if(rpcServer != null) rpcServer.stop(); 553 if(namesystem != null) namesystem.close(); 554 if (pauseMonitor != null) pauseMonitor.stop(); 555 if (plugins != null) { 556 for (ServicePlugin p : plugins) { 557 try { 558 p.stop(); 559 } catch (Throwable t) { 560 LOG.warn("ServicePlugin " + p + " could not be stopped", t); 561 } 562 } 563 } 564 stopHttpServer(); 565 } 566 567 private void startTrashEmptier(final Configuration conf) throws IOException { 568 long trashInterval = 569 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT); 570 if (trashInterval == 0) { 571 return; 572 } else if (trashInterval < 0) { 573 throw new IOException("Cannot start trash emptier with negative interval." 574 + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value."); 575 } 576 577 // This may be called from the transitionToActive code path, in which 578 // case the current user is the administrator, not the NN. The trash 579 // emptier needs to run as the NN. See HDFS-3972. 580 FileSystem fs = SecurityUtil.doAsLoginUser( 581 new PrivilegedExceptionAction<FileSystem>() { 582 @Override 583 public FileSystem run() throws IOException { 584 return FileSystem.get(conf); 585 } 586 }); 587 this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier"); 588 this.emptier.setDaemon(true); 589 this.emptier.start(); 590 } 591 592 private void stopTrashEmptier() { 593 if (this.emptier != null) { 594 emptier.interrupt(); 595 emptier = null; 596 } 597 } 598 599 private void startHttpServer(final Configuration conf) throws IOException { 600 httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf)); 601 httpServer.start(); 602 httpServer.setStartupProgress(startupProgress); 603 } 604 605 private void stopHttpServer() { 606 try { 607 if (httpServer != null) httpServer.stop(); 608 } catch (Exception e) { 609 LOG.error("Exception while stopping httpserver", e); 610 } 611 } 612 613 /** 614 * Start NameNode. 615 * <p> 616 * The name-node can be started with one of the following startup options: 617 * <ul> 618 * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li> 619 * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li> 620 * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li> 621 * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li> 622 * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster 623 * upgrade and create a snapshot of the current file system state</li> 624 * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node 625 * metadata</li> 626 * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the 627 * cluster back to the previous state</li> 628 * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 629 * previous upgrade</li> 630 * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li> 631 * </ul> 632 * The option is passed via configuration field: 633 * <tt>dfs.namenode.startup</tt> 634 * 635 * The conf will be modified to reflect the actual ports on which 636 * the NameNode is up and running if the user passes the port as 637 * <code>zero</code> in the conf. 638 * 639 * @param conf confirguration 640 * @throws IOException 641 */ 642 public NameNode(Configuration conf) throws IOException { 643 this(conf, NamenodeRole.NAMENODE); 644 } 645 646 protected NameNode(Configuration conf, NamenodeRole role) 647 throws IOException { 648 this.conf = conf; 649 this.role = role; 650 String nsId = getNameServiceId(conf); 651 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 652 this.haEnabled = HAUtil.isHAEnabled(conf, nsId); 653 state = createHAState(); 654 this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf); 655 this.haContext = createHAContext(); 656 try { 657 initializeGenericKeys(conf, nsId, namenodeId); 658 initialize(conf); 659 try { 660 haContext.writeLock(); 661 state.prepareToEnterState(haContext); 662 state.enterState(haContext); 663 } finally { 664 haContext.writeUnlock(); 665 } 666 } catch (IOException e) { 667 this.stop(); 668 throw e; 669 } catch (HadoopIllegalArgumentException e) { 670 this.stop(); 671 throw e; 672 } 673 } 674 675 protected HAState createHAState() { 676 return !haEnabled ? ACTIVE_STATE : STANDBY_STATE; 677 } 678 679 protected HAContext createHAContext() { 680 return new NameNodeHAContext(); 681 } 682 683 /** 684 * Wait for service to finish. 685 * (Normally, it runs forever.) 686 */ 687 public void join() { 688 try { 689 rpcServer.join(); 690 } catch (InterruptedException ie) { 691 LOG.info("Caught interrupted exception ", ie); 692 } 693 } 694 695 /** 696 * Stop all NameNode threads and wait for all to finish. 697 */ 698 public void stop() { 699 synchronized(this) { 700 if (stopRequested) 701 return; 702 stopRequested = true; 703 } 704 try { 705 if (state != null) { 706 state.exitState(haContext); 707 } 708 } catch (ServiceFailedException e) { 709 LOG.warn("Encountered exception while exiting state ", e); 710 } finally { 711 stopCommonServices(); 712 if (metrics != null) { 713 metrics.shutdown(); 714 } 715 if (namesystem != null) { 716 namesystem.shutdown(); 717 } 718 if (nameNodeStatusBeanName != null) { 719 MBeans.unregister(nameNodeStatusBeanName); 720 nameNodeStatusBeanName = null; 721 } 722 } 723 } 724 725 synchronized boolean isStopRequested() { 726 return stopRequested; 727 } 728 729 /** 730 * Is the cluster currently in safe mode? 731 */ 732 public boolean isInSafeMode() { 733 return namesystem.isInSafeMode(); 734 } 735 736 /** get FSImage */ 737 @VisibleForTesting 738 public FSImage getFSImage() { 739 return namesystem.dir.fsImage; 740 } 741 742 /** 743 * @return NameNode RPC address 744 */ 745 public InetSocketAddress getNameNodeAddress() { 746 return rpcServer.getRpcAddress(); 747 } 748 749 /** 750 * @return NameNode RPC address in "host:port" string form 751 */ 752 public String getNameNodeAddressHostPortString() { 753 return NetUtils.getHostPortString(rpcServer.getRpcAddress()); 754 } 755 756 /** 757 * @return NameNode service RPC address if configured, the 758 * NameNode RPC address otherwise 759 */ 760 public InetSocketAddress getServiceRpcAddress() { 761 final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress(); 762 return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr; 763 } 764 765 /** 766 * @return NameNode HTTP address, used by the Web UI, image transfer, 767 * and HTTP-based file system clients like Hftp and WebHDFS 768 */ 769 public InetSocketAddress getHttpAddress() { 770 return httpServer.getHttpAddress(); 771 } 772 773 /** 774 * @return NameNode HTTPS address, used by the Web UI, image transfer, 775 * and HTTP-based file system clients like Hftp and WebHDFS 776 */ 777 public InetSocketAddress getHttpsAddress() { 778 return httpServer.getHttpsAddress(); 779 } 780 781 /** 782 * Verify that configured directories exist, then 783 * Interactively confirm that formatting is desired 784 * for each existing directory and format them. 785 * 786 * @param conf 787 * @param force 788 * @return true if formatting was aborted, false otherwise 789 * @throws IOException 790 */ 791 private static boolean format(Configuration conf, boolean force, 792 boolean isInteractive) throws IOException { 793 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 794 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 795 initializeGenericKeys(conf, nsId, namenodeId); 796 checkAllowFormat(conf); 797 798 if (UserGroupInformation.isSecurityEnabled()) { 799 InetSocketAddress socAddr = getAddress(conf); 800 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 801 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName()); 802 } 803 804 Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf); 805 List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf); 806 List<URI> dirsToPrompt = new ArrayList<URI>(); 807 dirsToPrompt.addAll(nameDirsToFormat); 808 dirsToPrompt.addAll(sharedDirs); 809 List<URI> editDirsToFormat = 810 FSNamesystem.getNamespaceEditsDirs(conf); 811 812 // if clusterID is not provided - see if you can find the current one 813 String clusterId = StartupOption.FORMAT.getClusterId(); 814 if(clusterId == null || clusterId.equals("")) { 815 //Generate a new cluster id 816 clusterId = NNStorage.newClusterID(); 817 } 818 System.out.println("Formatting using clusterid: " + clusterId); 819 820 FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat); 821 try { 822 FSNamesystem fsn = new FSNamesystem(conf, fsImage); 823 fsImage.getEditLog().initJournalsForWrite(); 824 825 if (!fsImage.confirmFormat(force, isInteractive)) { 826 return true; // aborted 827 } 828 829 fsImage.format(fsn, clusterId); 830 } catch (IOException ioe) { 831 LOG.warn("Encountered exception during format: ", ioe); 832 fsImage.close(); 833 throw ioe; 834 } 835 return false; 836 } 837 838 public static void checkAllowFormat(Configuration conf) throws IOException { 839 if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 840 DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) { 841 throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY 842 + " is set to false for this filesystem, so it " 843 + "cannot be formatted. You will need to set " 844 + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter " 845 + "to true in order to format this filesystem"); 846 } 847 } 848 849 @VisibleForTesting 850 public static boolean initializeSharedEdits(Configuration conf) throws IOException { 851 return initializeSharedEdits(conf, true); 852 } 853 854 @VisibleForTesting 855 public static boolean initializeSharedEdits(Configuration conf, 856 boolean force) throws IOException { 857 return initializeSharedEdits(conf, force, false); 858 } 859 860 /** 861 * Clone the supplied configuration but remove the shared edits dirs. 862 * 863 * @param conf Supplies the original configuration. 864 * @return Cloned configuration without the shared edit dirs. 865 * @throws IOException on failure to generate the configuration. 866 */ 867 private static Configuration getConfigurationWithoutSharedEdits( 868 Configuration conf) 869 throws IOException { 870 List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false); 871 String editsDirsString = Joiner.on(",").join(editsDirs); 872 873 Configuration confWithoutShared = new Configuration(conf); 874 confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 875 confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, 876 editsDirsString); 877 return confWithoutShared; 878 } 879 880 /** 881 * Format a new shared edits dir and copy in enough edit log segments so that 882 * the standby NN can start up. 883 * 884 * @param conf configuration 885 * @param force format regardless of whether or not the shared edits dir exists 886 * @param interactive prompt the user when a dir exists 887 * @return true if the command aborts, false otherwise 888 */ 889 private static boolean initializeSharedEdits(Configuration conf, 890 boolean force, boolean interactive) throws IOException { 891 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 892 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 893 initializeGenericKeys(conf, nsId, namenodeId); 894 895 if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) { 896 LOG.fatal("No shared edits directory configured for namespace " + 897 nsId + " namenode " + namenodeId); 898 return false; 899 } 900 901 if (UserGroupInformation.isSecurityEnabled()) { 902 InetSocketAddress socAddr = getAddress(conf); 903 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 904 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName()); 905 } 906 907 NNStorage existingStorage = null; 908 FSImage sharedEditsImage = null; 909 try { 910 FSNamesystem fsns = 911 FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf)); 912 913 existingStorage = fsns.getFSImage().getStorage(); 914 NamespaceInfo nsInfo = existingStorage.getNamespaceInfo(); 915 916 List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf); 917 918 sharedEditsImage = new FSImage(conf, 919 Lists.<URI>newArrayList(), 920 sharedEditsDirs); 921 sharedEditsImage.getEditLog().initJournalsForWrite(); 922 923 if (!sharedEditsImage.confirmFormat(force, interactive)) { 924 return true; // abort 925 } 926 927 NNStorage newSharedStorage = sharedEditsImage.getStorage(); 928 // Call Storage.format instead of FSImage.format here, since we don't 929 // actually want to save a checkpoint - just prime the dirs with 930 // the existing namespace info 931 newSharedStorage.format(nsInfo); 932 sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo); 933 934 // Need to make sure the edit log segments are in good shape to initialize 935 // the shared edits dir. 936 fsns.getFSImage().getEditLog().close(); 937 fsns.getFSImage().getEditLog().initJournalsForWrite(); 938 fsns.getFSImage().getEditLog().recoverUnclosedStreams(); 939 940 copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage, 941 conf); 942 } catch (IOException ioe) { 943 LOG.error("Could not initialize shared edits dir", ioe); 944 return true; // aborted 945 } finally { 946 if (sharedEditsImage != null) { 947 try { 948 sharedEditsImage.close(); 949 } catch (IOException ioe) { 950 LOG.warn("Could not close sharedEditsImage", ioe); 951 } 952 } 953 // Have to unlock storage explicitly for the case when we're running in a 954 // unit test, which runs in the same JVM as NNs. 955 if (existingStorage != null) { 956 try { 957 existingStorage.unlockAll(); 958 } catch (IOException ioe) { 959 LOG.warn("Could not unlock storage directories", ioe); 960 return true; // aborted 961 } 962 } 963 } 964 return false; // did not abort 965 } 966 967 private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns, 968 Collection<URI> sharedEditsDirs, NNStorage newSharedStorage, 969 Configuration conf) throws IOException { 970 Preconditions.checkArgument(!sharedEditsDirs.isEmpty(), 971 "No shared edits specified"); 972 // Copy edit log segments into the new shared edits dir. 973 List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs); 974 FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage, 975 sharedEditsUris); 976 newSharedEditLog.initJournalsForWrite(); 977 newSharedEditLog.recoverUnclosedStreams(); 978 979 FSEditLog sourceEditLog = fsns.getFSImage().editLog; 980 981 long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId(); 982 983 Collection<EditLogInputStream> streams = null; 984 try { 985 streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0); 986 987 // Set the nextTxid to the CheckpointTxId+1 988 newSharedEditLog.setNextTxId(fromTxId + 1); 989 990 // Copy all edits after last CheckpointTxId to shared edits dir 991 for (EditLogInputStream stream : streams) { 992 LOG.debug("Beginning to copy stream " + stream + " to shared edits"); 993 FSEditLogOp op; 994 boolean segmentOpen = false; 995 while ((op = stream.readOp()) != null) { 996 if (LOG.isTraceEnabled()) { 997 LOG.trace("copying op: " + op); 998 } 999 if (!segmentOpen) { 1000 newSharedEditLog.startLogSegment(op.txid, false); 1001 segmentOpen = true; 1002 } 1003 1004 newSharedEditLog.logEdit(op); 1005 1006 if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) { 1007 newSharedEditLog.logSync(); 1008 newSharedEditLog.endCurrentLogSegment(false); 1009 LOG.debug("ending log segment because of END_LOG_SEGMENT op in " 1010 + stream); 1011 segmentOpen = false; 1012 } 1013 } 1014 1015 if (segmentOpen) { 1016 LOG.debug("ending log segment because of end of stream in " + stream); 1017 newSharedEditLog.logSync(); 1018 newSharedEditLog.endCurrentLogSegment(false); 1019 segmentOpen = false; 1020 } 1021 } 1022 } finally { 1023 if (streams != null) { 1024 FSEditLog.closeAllStreams(streams); 1025 } 1026 } 1027 } 1028 1029 private static boolean finalize(Configuration conf, 1030 boolean isConfirmationNeeded 1031 ) throws IOException { 1032 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1033 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1034 initializeGenericKeys(conf, nsId, namenodeId); 1035 1036 FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf)); 1037 System.err.print( 1038 "\"finalize\" will remove the previous state of the files system.\n" 1039 + "Recent upgrade will become permanent.\n" 1040 + "Rollback option will not be available anymore.\n"); 1041 if (isConfirmationNeeded) { 1042 if (!confirmPrompt("Finalize filesystem state?")) { 1043 System.err.println("Finalize aborted."); 1044 return true; 1045 } 1046 } 1047 nsys.dir.fsImage.finalizeUpgrade(); 1048 return false; 1049 } 1050 1051 private static void printUsage(PrintStream out) { 1052 out.println(USAGE + "\n"); 1053 } 1054 1055 private static StartupOption parseArguments(String args[]) { 1056 int argsLen = (args == null) ? 0 : args.length; 1057 StartupOption startOpt = StartupOption.REGULAR; 1058 for(int i=0; i < argsLen; i++) { 1059 String cmd = args[i]; 1060 if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) { 1061 startOpt = StartupOption.FORMAT; 1062 for (i = i + 1; i < argsLen; i++) { 1063 if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1064 i++; 1065 if (i >= argsLen) { 1066 // if no cluster id specified, return null 1067 LOG.fatal("Must specify a valid cluster ID after the " 1068 + StartupOption.CLUSTERID.getName() + " flag"); 1069 return null; 1070 } 1071 String clusterId = args[i]; 1072 // Make sure an id is specified and not another flag 1073 if (clusterId.isEmpty() || 1074 clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) || 1075 clusterId.equalsIgnoreCase( 1076 StartupOption.NONINTERACTIVE.getName())) { 1077 LOG.fatal("Must specify a valid cluster ID after the " 1078 + StartupOption.CLUSTERID.getName() + " flag"); 1079 return null; 1080 } 1081 startOpt.setClusterId(clusterId); 1082 } 1083 1084 if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) { 1085 startOpt.setForceFormat(true); 1086 } 1087 1088 if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) { 1089 startOpt.setInteractiveFormat(false); 1090 } 1091 } 1092 } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) { 1093 startOpt = StartupOption.GENCLUSTERID; 1094 } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) { 1095 startOpt = StartupOption.REGULAR; 1096 } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) { 1097 startOpt = StartupOption.BACKUP; 1098 } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) { 1099 startOpt = StartupOption.CHECKPOINT; 1100 } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) { 1101 startOpt = StartupOption.UPGRADE; 1102 // might be followed by two args 1103 if (i + 2 < argsLen 1104 && args[i + 1].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1105 i += 2; 1106 startOpt.setClusterId(args[i]); 1107 } 1108 } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) { 1109 startOpt = StartupOption.ROLLBACK; 1110 } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) { 1111 startOpt = StartupOption.FINALIZE; 1112 } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) { 1113 startOpt = StartupOption.IMPORT; 1114 } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) { 1115 startOpt = StartupOption.BOOTSTRAPSTANDBY; 1116 return startOpt; 1117 } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) { 1118 startOpt = StartupOption.INITIALIZESHAREDEDITS; 1119 for (i = i + 1 ; i < argsLen; i++) { 1120 if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) { 1121 startOpt.setInteractiveFormat(false); 1122 } else if (StartupOption.FORCE.getName().equals(args[i])) { 1123 startOpt.setForceFormat(true); 1124 } else { 1125 LOG.fatal("Invalid argument: " + args[i]); 1126 return null; 1127 } 1128 } 1129 return startOpt; 1130 } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) { 1131 if (startOpt != StartupOption.REGULAR) { 1132 throw new RuntimeException("Can't combine -recover with " + 1133 "other startup options."); 1134 } 1135 startOpt = StartupOption.RECOVER; 1136 while (++i < argsLen) { 1137 if (args[i].equalsIgnoreCase( 1138 StartupOption.FORCE.getName())) { 1139 startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE); 1140 } else { 1141 throw new RuntimeException("Error parsing recovery options: " + 1142 "can't understand option \"" + args[i] + "\""); 1143 } 1144 } 1145 } else { 1146 return null; 1147 } 1148 } 1149 return startOpt; 1150 } 1151 1152 private static void setStartupOption(Configuration conf, StartupOption opt) { 1153 conf.set(DFS_NAMENODE_STARTUP_KEY, opt.toString()); 1154 } 1155 1156 static StartupOption getStartupOption(Configuration conf) { 1157 return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY, 1158 StartupOption.REGULAR.toString())); 1159 } 1160 1161 private static void doRecovery(StartupOption startOpt, Configuration conf) 1162 throws IOException { 1163 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1164 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1165 initializeGenericKeys(conf, nsId, namenodeId); 1166 if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) { 1167 if (!confirmPrompt("You have selected Metadata Recovery mode. " + 1168 "This mode is intended to recover lost metadata on a corrupt " + 1169 "filesystem. Metadata recovery mode often permanently deletes " + 1170 "data from your HDFS filesystem. Please back up your edit log " + 1171 "and fsimage before trying this!\n\n" + 1172 "Are you ready to proceed? (Y/N)\n")) { 1173 System.err.println("Recovery aborted at user request.\n"); 1174 return; 1175 } 1176 } 1177 MetaRecoveryContext.LOG.info("starting recovery..."); 1178 UserGroupInformation.setConfiguration(conf); 1179 NameNode.initMetrics(conf, startOpt.toNodeRole()); 1180 FSNamesystem fsn = null; 1181 try { 1182 fsn = FSNamesystem.loadFromDisk(conf); 1183 fsn.saveNamespace(); 1184 MetaRecoveryContext.LOG.info("RECOVERY COMPLETE"); 1185 } catch (IOException e) { 1186 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1187 throw e; 1188 } catch (RuntimeException e) { 1189 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1190 throw e; 1191 } finally { 1192 if (fsn != null) 1193 fsn.close(); 1194 } 1195 } 1196 1197 public static NameNode createNameNode(String argv[], Configuration conf) 1198 throws IOException { 1199 if (conf == null) 1200 conf = new HdfsConfiguration(); 1201 StartupOption startOpt = parseArguments(argv); 1202 if (startOpt == null) { 1203 printUsage(System.err); 1204 return null; 1205 } 1206 setStartupOption(conf, startOpt); 1207 1208 if (HAUtil.isHAEnabled(conf, DFSUtil.getNamenodeNameServiceId(conf)) && 1209 (startOpt == StartupOption.UPGRADE || 1210 startOpt == StartupOption.ROLLBACK || 1211 startOpt == StartupOption.FINALIZE)) { 1212 throw new HadoopIllegalArgumentException("Invalid startup option. " + 1213 "Cannot perform DFS upgrade with HA enabled."); 1214 } 1215 1216 switch (startOpt) { 1217 case FORMAT: { 1218 boolean aborted = format(conf, startOpt.getForceFormat(), 1219 startOpt.getInteractiveFormat()); 1220 terminate(aborted ? 1 : 0); 1221 return null; // avoid javac warning 1222 } 1223 case GENCLUSTERID: { 1224 System.err.println("Generating new cluster id:"); 1225 System.out.println(NNStorage.newClusterID()); 1226 terminate(0); 1227 return null; 1228 } 1229 case FINALIZE: { 1230 boolean aborted = finalize(conf, true); 1231 terminate(aborted ? 1 : 0); 1232 return null; // avoid javac warning 1233 } 1234 case BOOTSTRAPSTANDBY: { 1235 String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length); 1236 int rc = BootstrapStandby.run(toolArgs, conf); 1237 terminate(rc); 1238 return null; // avoid warning 1239 } 1240 case INITIALIZESHAREDEDITS: { 1241 boolean aborted = initializeSharedEdits(conf, 1242 startOpt.getForceFormat(), 1243 startOpt.getInteractiveFormat()); 1244 terminate(aborted ? 1 : 0); 1245 return null; // avoid warning 1246 } 1247 case BACKUP: 1248 case CHECKPOINT: { 1249 NamenodeRole role = startOpt.toNodeRole(); 1250 DefaultMetricsSystem.initialize(role.toString().replace(" ", "")); 1251 return new BackupNode(conf, role); 1252 } 1253 case RECOVER: { 1254 NameNode.doRecovery(startOpt, conf); 1255 return null; 1256 } 1257 default: { 1258 DefaultMetricsSystem.initialize("NameNode"); 1259 return new NameNode(conf); 1260 } 1261 } 1262 } 1263 1264 /** 1265 * In federation configuration is set for a set of 1266 * namenode and secondary namenode/backup/checkpointer, which are 1267 * grouped under a logical nameservice ID. The configuration keys specific 1268 * to them have suffix set to configured nameserviceId. 1269 * 1270 * This method copies the value from specific key of format key.nameserviceId 1271 * to key, to set up the generic configuration. Once this is done, only 1272 * generic version of the configuration is read in rest of the code, for 1273 * backward compatibility and simpler code changes. 1274 * 1275 * @param conf 1276 * Configuration object to lookup specific key and to set the value 1277 * to the key passed. Note the conf object is modified 1278 * @param nameserviceId name service Id (to distinguish federated NNs) 1279 * @param namenodeId the namenode ID (to distinguish HA NNs) 1280 * @see DFSUtil#setGenericConf(Configuration, String, String, String...) 1281 */ 1282 public static void initializeGenericKeys(Configuration conf, 1283 String nameserviceId, String namenodeId) { 1284 if ((nameserviceId != null && !nameserviceId.isEmpty()) || 1285 (namenodeId != null && !namenodeId.isEmpty())) { 1286 if (nameserviceId != null) { 1287 conf.set(DFS_NAMESERVICE_ID, nameserviceId); 1288 } 1289 if (namenodeId != null) { 1290 conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId); 1291 } 1292 1293 DFSUtil.setGenericConf(conf, nameserviceId, namenodeId, 1294 NAMENODE_SPECIFIC_KEYS); 1295 DFSUtil.setGenericConf(conf, nameserviceId, null, 1296 NAMESERVICE_SPECIFIC_KEYS); 1297 } 1298 1299 // If the RPC address is set use it to (re-)configure the default FS 1300 if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) { 1301 URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 1302 + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY)); 1303 conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); 1304 LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString()); 1305 } 1306 } 1307 1308 /** 1309 * Get the name service Id for the node 1310 * @return name service Id or null if federation is not configured 1311 */ 1312 protected String getNameServiceId(Configuration conf) { 1313 return DFSUtil.getNamenodeNameServiceId(conf); 1314 } 1315 1316 /** 1317 */ 1318 public static void main(String argv[]) throws Exception { 1319 if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) { 1320 System.exit(0); 1321 } 1322 1323 try { 1324 StringUtils.startupShutdownMessage(NameNode.class, argv, LOG); 1325 NameNode namenode = createNameNode(argv, null); 1326 if (namenode != null) { 1327 namenode.join(); 1328 } 1329 } catch (Throwable e) { 1330 LOG.fatal("Exception in namenode join", e); 1331 terminate(1, e); 1332 } 1333 } 1334 1335 synchronized void monitorHealth() 1336 throws HealthCheckFailedException, AccessControlException { 1337 namesystem.checkSuperuserPrivilege(); 1338 if (!haEnabled) { 1339 return; // no-op, if HA is not enabled 1340 } 1341 getNamesystem().checkAvailableResources(); 1342 if (!getNamesystem().nameNodeHasResourcesAvailable()) { 1343 throw new HealthCheckFailedException( 1344 "The NameNode has no resources available"); 1345 } 1346 } 1347 1348 synchronized void transitionToActive() 1349 throws ServiceFailedException, AccessControlException { 1350 namesystem.checkSuperuserPrivilege(); 1351 if (!haEnabled) { 1352 throw new ServiceFailedException("HA for namenode is not enabled"); 1353 } 1354 state.setState(haContext, ACTIVE_STATE); 1355 } 1356 1357 synchronized void transitionToStandby() 1358 throws ServiceFailedException, AccessControlException { 1359 namesystem.checkSuperuserPrivilege(); 1360 if (!haEnabled) { 1361 throw new ServiceFailedException("HA for namenode is not enabled"); 1362 } 1363 state.setState(haContext, STANDBY_STATE); 1364 } 1365 1366 synchronized HAServiceStatus getServiceStatus() 1367 throws ServiceFailedException, AccessControlException { 1368 namesystem.checkSuperuserPrivilege(); 1369 if (!haEnabled) { 1370 throw new ServiceFailedException("HA for namenode is not enabled"); 1371 } 1372 if (state == null) { 1373 return new HAServiceStatus(HAServiceState.INITIALIZING); 1374 } 1375 HAServiceState retState = state.getServiceState(); 1376 HAServiceStatus ret = new HAServiceStatus(retState); 1377 if (retState == HAServiceState.STANDBY) { 1378 String safemodeTip = namesystem.getSafeModeTip(); 1379 if (!safemodeTip.isEmpty()) { 1380 ret.setNotReadyToBecomeActive( 1381 "The NameNode is in safemode. " + 1382 safemodeTip); 1383 } else { 1384 ret.setReadyToBecomeActive(); 1385 } 1386 } else if (retState == HAServiceState.ACTIVE) { 1387 ret.setReadyToBecomeActive(); 1388 } else { 1389 ret.setNotReadyToBecomeActive("State is " + state); 1390 } 1391 return ret; 1392 } 1393 1394 synchronized HAServiceState getServiceState() { 1395 if (state == null) { 1396 return HAServiceState.INITIALIZING; 1397 } 1398 return state.getServiceState(); 1399 } 1400 1401 /** 1402 * Register NameNodeStatusMXBean 1403 */ 1404 private void registerNNSMXBean() { 1405 nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this); 1406 } 1407 1408 @Override // NameNodeStatusMXBean 1409 public String getNNRole() { 1410 String roleStr = ""; 1411 NamenodeRole role = getRole(); 1412 if (null != role) { 1413 roleStr = role.toString(); 1414 } 1415 return roleStr; 1416 } 1417 1418 @Override // NameNodeStatusMXBean 1419 public String getState() { 1420 String servStateStr = ""; 1421 HAServiceState servState = getServiceState(); 1422 if (null != servState) { 1423 servStateStr = servState.toString(); 1424 } 1425 return servStateStr; 1426 } 1427 1428 @Override // NameNodeStatusMXBean 1429 public String getHostAndPort() { 1430 return getNameNodeAddressHostPortString(); 1431 } 1432 1433 @Override // NameNodeStatusMXBean 1434 public boolean isSecurityEnabled() { 1435 return UserGroupInformation.isSecurityEnabled(); 1436 } 1437 1438 /** 1439 * Shutdown the NN immediately in an ungraceful way. Used when it would be 1440 * unsafe for the NN to continue operating, e.g. during a failed HA state 1441 * transition. 1442 * 1443 * @param t exception which warrants the shutdown. Printed to the NN log 1444 * before exit. 1445 * @throws ExitException thrown only for testing. 1446 */ 1447 protected synchronized void doImmediateShutdown(Throwable t) 1448 throws ExitException { 1449 String message = "Error encountered requiring NN shutdown. " + 1450 "Shutting down immediately."; 1451 try { 1452 LOG.fatal(message, t); 1453 } catch (Throwable ignored) { 1454 // This is unlikely to happen, but there's nothing we can do if it does. 1455 } 1456 terminate(1, t); 1457 } 1458 1459 /** 1460 * Class used to expose {@link NameNode} as context to {@link HAState} 1461 */ 1462 protected class NameNodeHAContext implements HAContext { 1463 @Override 1464 public void setState(HAState s) { 1465 state = s; 1466 } 1467 1468 @Override 1469 public HAState getState() { 1470 return state; 1471 } 1472 1473 @Override 1474 public void startActiveServices() throws IOException { 1475 try { 1476 namesystem.startActiveServices(); 1477 startTrashEmptier(conf); 1478 } catch (Throwable t) { 1479 doImmediateShutdown(t); 1480 } 1481 } 1482 1483 @Override 1484 public void stopActiveServices() throws IOException { 1485 try { 1486 if (namesystem != null) { 1487 namesystem.stopActiveServices(); 1488 } 1489 stopTrashEmptier(); 1490 } catch (Throwable t) { 1491 doImmediateShutdown(t); 1492 } 1493 } 1494 1495 @Override 1496 public void startStandbyServices() throws IOException { 1497 try { 1498 namesystem.startStandbyServices(conf); 1499 } catch (Throwable t) { 1500 doImmediateShutdown(t); 1501 } 1502 } 1503 1504 @Override 1505 public void prepareToStopStandbyServices() throws ServiceFailedException { 1506 try { 1507 namesystem.prepareToStopStandbyServices(); 1508 } catch (Throwable t) { 1509 doImmediateShutdown(t); 1510 } 1511 } 1512 1513 @Override 1514 public void stopStandbyServices() throws IOException { 1515 try { 1516 if (namesystem != null) { 1517 namesystem.stopStandbyServices(); 1518 } 1519 } catch (Throwable t) { 1520 doImmediateShutdown(t); 1521 } 1522 } 1523 1524 @Override 1525 public void writeLock() { 1526 namesystem.writeLock(); 1527 } 1528 1529 @Override 1530 public void writeUnlock() { 1531 namesystem.writeUnlock(); 1532 } 1533 1534 /** Check if an operation of given category is allowed */ 1535 @Override 1536 public void checkOperation(final OperationCategory op) 1537 throws StandbyException { 1538 state.checkOperation(haContext, op); 1539 } 1540 1541 @Override 1542 public boolean allowStaleReads() { 1543 return allowStaleStandbyReads; 1544 } 1545 1546 } 1547 1548 public boolean isStandbyState() { 1549 return (state.equals(STANDBY_STATE)); 1550 } 1551 1552 /** 1553 * Check that a request to change this node's HA state is valid. 1554 * In particular, verifies that, if auto failover is enabled, non-forced 1555 * requests from the HAAdmin CLI are rejected, and vice versa. 1556 * 1557 * @param req the request to check 1558 * @throws AccessControlException if the request is disallowed 1559 */ 1560 void checkHaStateChange(StateChangeRequestInfo req) 1561 throws AccessControlException { 1562 boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY, 1563 DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT); 1564 switch (req.getSource()) { 1565 case REQUEST_BY_USER: 1566 if (autoHaEnabled) { 1567 throw new AccessControlException( 1568 "Manual HA control for this NameNode is disallowed, because " + 1569 "automatic HA is enabled."); 1570 } 1571 break; 1572 case REQUEST_BY_USER_FORCED: 1573 if (autoHaEnabled) { 1574 LOG.warn("Allowing manual HA control from " + 1575 Server.getRemoteAddress() + 1576 " even though automatic HA is enabled, because the user " + 1577 "specified the force flag"); 1578 } 1579 break; 1580 case REQUEST_BY_ZKFC: 1581 if (!autoHaEnabled) { 1582 throw new AccessControlException( 1583 "Request from ZK failover controller at " + 1584 Server.getRemoteAddress() + " denied since automatic HA " + 1585 "is not enabled"); 1586 } 1587 break; 1588 } 1589 } 1590 }