001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import com.google.common.annotations.VisibleForTesting; 021import com.google.common.base.Joiner; 022import com.google.common.base.Preconditions; 023import com.google.common.collect.Lists; 024import org.apache.hadoop.HadoopIllegalArgumentException; 025import org.apache.hadoop.classification.InterfaceAudience; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.fs.FileSystem; 028import org.apache.hadoop.fs.Trash; 029import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 030import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; 031import org.apache.hadoop.ha.HAServiceStatus; 032import org.apache.hadoop.ha.HealthCheckFailedException; 033import org.apache.hadoop.ha.ServiceFailedException; 034import org.apache.hadoop.hdfs.DFSConfigKeys; 035import org.apache.hadoop.hdfs.DFSUtil; 036import org.apache.hadoop.hdfs.HAUtil; 037import org.apache.hadoop.hdfs.HdfsConfiguration; 038import org.apache.hadoop.hdfs.protocol.ClientProtocol; 039import org.apache.hadoop.hdfs.protocol.HdfsConstants; 040import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 041import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 042import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 043import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState; 044import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby; 045import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; 046import org.apache.hadoop.hdfs.server.namenode.ha.HAState; 047import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState; 048import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 049import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 050import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics; 051import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; 052import org.apache.hadoop.hdfs.server.protocol.JournalProtocol; 053import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; 054import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; 055import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; 056import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 057import org.apache.hadoop.ipc.RefreshCallQueueProtocol; 058import org.apache.hadoop.ipc.Server; 059import org.apache.hadoop.ipc.StandbyException; 060import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 061import org.apache.hadoop.metrics2.util.MBeans; 062import org.apache.hadoop.net.NetUtils; 063import org.apache.hadoop.security.AccessControlException; 064import org.apache.hadoop.security.RefreshUserMappingsProtocol; 065import org.apache.hadoop.security.SecurityUtil; 066import org.apache.hadoop.security.UserGroupInformation; 067import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; 068import org.apache.hadoop.tools.GetUserMappingsProtocol; 069import org.apache.hadoop.tracing.SpanReceiverHost; 070import org.apache.hadoop.tracing.TraceAdminProtocol; 071import org.apache.hadoop.util.ExitUtil.ExitException; 072import org.apache.hadoop.util.JvmPauseMonitor; 073import org.apache.hadoop.util.ServicePlugin; 074import org.apache.hadoop.util.StringUtils; 075import org.apache.log4j.LogManager; 076import org.slf4j.Logger; 077import org.slf4j.LoggerFactory; 078 079import javax.management.ObjectName; 080 081import java.io.IOException; 082import java.io.PrintStream; 083import java.net.InetSocketAddress; 084import java.net.URI; 085import java.security.PrivilegedExceptionAction; 086import java.util.ArrayList; 087import java.util.Arrays; 088import java.util.Collection; 089import java.util.List; 090import java.util.concurrent.atomic.AtomicBoolean; 091 092import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 093import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 094import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 095import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT; 096import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_AUTO_FAILOVER_ENABLED_KEY; 097import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY; 098import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY; 099import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_ZKFC_PORT_KEY; 100import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY; 101import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY; 102import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY; 103import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY; 104import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY; 105import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY; 106import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY; 107import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY; 108import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTPS_BIND_HOST_KEY; 109import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_DEFAULT; 110import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY; 111import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_BIND_HOST_KEY; 112import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY; 113import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY; 114import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY; 115import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; 116import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PLUGINS_KEY; 117import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY; 118import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_BIND_HOST_KEY; 119import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY; 120import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY; 121import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY; 122import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY; 123import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; 124import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_STARTUP_KEY; 125import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT; 126import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY; 127import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICE_ID; 128import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY; 129import static org.apache.hadoop.hdfs.DFSConfigKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS; 130import static org.apache.hadoop.util.ExitUtil.terminate; 131import static org.apache.hadoop.util.ToolRunner.confirmPrompt; 132 133/********************************************************** 134 * NameNode serves as both directory namespace manager and 135 * "inode table" for the Hadoop DFS. There is a single NameNode 136 * running in any DFS deployment. (Well, except when there 137 * is a second backup/failover NameNode, or when using federated NameNodes.) 138 * 139 * The NameNode controls two critical tables: 140 * 1) filename->blocksequence (namespace) 141 * 2) block->machinelist ("inodes") 142 * 143 * The first table is stored on disk and is very precious. 144 * The second table is rebuilt every time the NameNode comes up. 145 * 146 * 'NameNode' refers to both this class as well as the 'NameNode server'. 147 * The 'FSNamesystem' class actually performs most of the filesystem 148 * management. The majority of the 'NameNode' class itself is concerned 149 * with exposing the IPC interface and the HTTP server to the outside world, 150 * plus some configuration management. 151 * 152 * NameNode implements the 153 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which 154 * allows clients to ask for DFS services. 155 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for 156 * direct use by authors of DFS client code. End-users should instead use the 157 * {@link org.apache.hadoop.fs.FileSystem} class. 158 * 159 * NameNode also implements the 160 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface, 161 * used by DataNodes that actually store DFS data blocks. These 162 * methods are invoked repeatedly and automatically by all the 163 * DataNodes in a DFS deployment. 164 * 165 * NameNode also implements the 166 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface, 167 * used by secondary namenodes or rebalancing processes to get partial 168 * NameNode state, for example partial blocksMap etc. 169 **********************************************************/ 170@InterfaceAudience.Private 171public class NameNode implements NameNodeStatusMXBean { 172 static{ 173 HdfsConfiguration.init(); 174 } 175 176 /** 177 * Categories of operations supported by the namenode. 178 */ 179 public static enum OperationCategory { 180 /** Operations that are state agnostic */ 181 UNCHECKED, 182 /** Read operation that does not change the namespace state */ 183 READ, 184 /** Write operation that changes the namespace state */ 185 WRITE, 186 /** Operations related to checkpointing */ 187 CHECKPOINT, 188 /** Operations related to {@link JournalProtocol} */ 189 JOURNAL 190 } 191 192 /** 193 * HDFS configuration can have three types of parameters: 194 * <ol> 195 * <li>Parameters that are common for all the name services in the cluster.</li> 196 * <li>Parameters that are specific to a name service. These keys are suffixed 197 * with nameserviceId in the configuration. For example, 198 * "dfs.namenode.rpc-address.nameservice1".</li> 199 * <li>Parameters that are specific to a single name node. These keys are suffixed 200 * with nameserviceId and namenodeId in the configuration. for example, 201 * "dfs.namenode.rpc-address.nameservice1.namenode1"</li> 202 * </ol> 203 * 204 * In the latter cases, operators may specify the configuration without 205 * any suffix, with a nameservice suffix, or with a nameservice and namenode 206 * suffix. The more specific suffix will take precedence. 207 * 208 * These keys are specific to a given namenode, and thus may be configured 209 * globally, for a nameservice, or for a specific namenode within a nameservice. 210 */ 211 public static final String[] NAMENODE_SPECIFIC_KEYS = { 212 DFS_NAMENODE_RPC_ADDRESS_KEY, 213 DFS_NAMENODE_RPC_BIND_HOST_KEY, 214 DFS_NAMENODE_NAME_DIR_KEY, 215 DFS_NAMENODE_EDITS_DIR_KEY, 216 DFS_NAMENODE_SHARED_EDITS_DIR_KEY, 217 DFS_NAMENODE_CHECKPOINT_DIR_KEY, 218 DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY, 219 DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, 220 DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY, 221 DFS_NAMENODE_HTTP_ADDRESS_KEY, 222 DFS_NAMENODE_HTTPS_ADDRESS_KEY, 223 DFS_NAMENODE_HTTP_BIND_HOST_KEY, 224 DFS_NAMENODE_HTTPS_BIND_HOST_KEY, 225 DFS_NAMENODE_KEYTAB_FILE_KEY, 226 DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, 227 DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY, 228 DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY, 229 DFS_NAMENODE_BACKUP_ADDRESS_KEY, 230 DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY, 231 DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY, 232 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, 233 DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY, 234 DFS_HA_FENCE_METHODS_KEY, 235 DFS_HA_ZKFC_PORT_KEY, 236 DFS_HA_FENCE_METHODS_KEY 237 }; 238 239 /** 240 * @see #NAMENODE_SPECIFIC_KEYS 241 * These keys are specific to a nameservice, but may not be overridden 242 * for a specific namenode. 243 */ 244 public static final String[] NAMESERVICE_SPECIFIC_KEYS = { 245 DFS_HA_AUTO_FAILOVER_ENABLED_KEY 246 }; 247 248 private static final String USAGE = "Usage: java NameNode [" 249 + StartupOption.BACKUP.getName() + "] | \n\t[" 250 + StartupOption.CHECKPOINT.getName() + "] | \n\t[" 251 + StartupOption.FORMAT.getName() + " [" 252 + StartupOption.CLUSTERID.getName() + " cid ] [" 253 + StartupOption.FORCE.getName() + "] [" 254 + StartupOption.NONINTERACTIVE.getName() + "] ] | \n\t[" 255 + StartupOption.UPGRADE.getName() + 256 " [" + StartupOption.CLUSTERID.getName() + " cid]" + 257 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t[" 258 + StartupOption.UPGRADEONLY.getName() + 259 " [" + StartupOption.CLUSTERID.getName() + " cid]" + 260 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t[" 261 + StartupOption.ROLLBACK.getName() + "] | \n\t[" 262 + StartupOption.ROLLINGUPGRADE.getName() + " " 263 + RollingUpgradeStartupOption.getAllOptionString() + " ] | \n\t[" 264 + StartupOption.FINALIZE.getName() + "] | \n\t[" 265 + StartupOption.IMPORT.getName() + "] | \n\t[" 266 + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | \n\t[" 267 + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | \n\t[" 268 + StartupOption.RECOVER.getName() + " [ " 269 + StartupOption.FORCE.getName() + "] ] | \n\t[" 270 + StartupOption.METADATAVERSION.getName() + " ] " 271 + " ]"; 272 273 274 public long getProtocolVersion(String protocol, 275 long clientVersion) throws IOException { 276 if (protocol.equals(ClientProtocol.class.getName())) { 277 return ClientProtocol.versionID; 278 } else if (protocol.equals(DatanodeProtocol.class.getName())){ 279 return DatanodeProtocol.versionID; 280 } else if (protocol.equals(NamenodeProtocol.class.getName())){ 281 return NamenodeProtocol.versionID; 282 } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){ 283 return RefreshAuthorizationPolicyProtocol.versionID; 284 } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){ 285 return RefreshUserMappingsProtocol.versionID; 286 } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) { 287 return RefreshCallQueueProtocol.versionID; 288 } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){ 289 return GetUserMappingsProtocol.versionID; 290 } else if (protocol.equals(TraceAdminProtocol.class.getName())){ 291 return TraceAdminProtocol.versionID; 292 } else { 293 throw new IOException("Unknown protocol to name node: " + protocol); 294 } 295 } 296 297 public static final int DEFAULT_PORT = 8020; 298 public static final Logger LOG = 299 LoggerFactory.getLogger(NameNode.class.getName()); 300 public static final Logger stateChangeLog = 301 LoggerFactory.getLogger("org.apache.hadoop.hdfs.StateChange"); 302 public static final Logger blockStateChangeLog = 303 LoggerFactory.getLogger("BlockStateChange"); 304 public static final HAState ACTIVE_STATE = new ActiveState(); 305 public static final HAState STANDBY_STATE = new StandbyState(); 306 307 protected FSNamesystem namesystem; 308 protected final Configuration conf; 309 protected final NamenodeRole role; 310 private volatile HAState state; 311 private final boolean haEnabled; 312 private final HAContext haContext; 313 protected final boolean allowStaleStandbyReads; 314 private AtomicBoolean started = new AtomicBoolean(false); 315 316 317 /** httpServer */ 318 protected NameNodeHttpServer httpServer; 319 private Thread emptier; 320 /** only used for testing purposes */ 321 protected boolean stopRequested = false; 322 /** Registration information of this name-node */ 323 protected NamenodeRegistration nodeRegistration; 324 /** Activated plug-ins. */ 325 private List<ServicePlugin> plugins; 326 327 private NameNodeRpcServer rpcServer; 328 329 private JvmPauseMonitor pauseMonitor; 330 private ObjectName nameNodeStatusBeanName; 331 SpanReceiverHost spanReceiverHost; 332 /** 333 * The namenode address that clients will use to access this namenode 334 * or the name service. For HA configurations using logical URI, it 335 * will be the logical address. 336 */ 337 private String clientNamenodeAddress; 338 339 /** Format a new filesystem. Destroys any filesystem that may already 340 * exist at this location. **/ 341 public static void format(Configuration conf) throws IOException { 342 format(conf, true, true); 343 } 344 345 static NameNodeMetrics metrics; 346 private static final StartupProgress startupProgress = new StartupProgress(); 347 /** Return the {@link FSNamesystem} object. 348 * @return {@link FSNamesystem} object. 349 */ 350 public FSNamesystem getNamesystem() { 351 return namesystem; 352 } 353 354 public NamenodeProtocols getRpcServer() { 355 return rpcServer; 356 } 357 358 static void initMetrics(Configuration conf, NamenodeRole role) { 359 metrics = NameNodeMetrics.create(conf, role); 360 } 361 362 public static NameNodeMetrics getNameNodeMetrics() { 363 return metrics; 364 } 365 366 /** 367 * Returns object used for reporting namenode startup progress. 368 * 369 * @return StartupProgress for reporting namenode startup progress 370 */ 371 public static StartupProgress getStartupProgress() { 372 return startupProgress; 373 } 374 375 /** 376 * Return the service name of the issued delegation token. 377 * 378 * @return The name service id in HA-mode, or the rpc address in non-HA mode 379 */ 380 public String getTokenServiceName() { 381 return getClientNamenodeAddress(); 382 } 383 384 /** 385 * Set the namenode address that will be used by clients to access this 386 * namenode or name service. This needs to be called before the config 387 * is overriden. 388 */ 389 public void setClientNamenodeAddress(Configuration conf) { 390 String nnAddr = conf.get(FS_DEFAULT_NAME_KEY); 391 if (nnAddr == null) { 392 // default fs is not set. 393 clientNamenodeAddress = null; 394 return; 395 } 396 397 LOG.info("{} is {}", FS_DEFAULT_NAME_KEY, nnAddr); 398 URI nnUri = URI.create(nnAddr); 399 400 String nnHost = nnUri.getHost(); 401 if (nnHost == null) { 402 clientNamenodeAddress = null; 403 return; 404 } 405 406 if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) { 407 // host name is logical 408 clientNamenodeAddress = nnHost; 409 } else if (nnUri.getPort() > 0) { 410 // physical address with a valid port 411 clientNamenodeAddress = nnUri.getAuthority(); 412 } else { 413 // the port is missing or 0. Figure out real bind address later. 414 clientNamenodeAddress = null; 415 return; 416 } 417 LOG.info("Clients are to use {} to access" 418 + " this namenode/service.", clientNamenodeAddress ); 419 } 420 421 /** 422 * Get the namenode address to be used by clients. 423 * @return nn address 424 */ 425 public String getClientNamenodeAddress() { 426 return clientNamenodeAddress; 427 } 428 429 public static InetSocketAddress getAddress(String address) { 430 return NetUtils.createSocketAddr(address, DEFAULT_PORT); 431 } 432 433 /** 434 * Set the configuration property for the service rpc address 435 * to address 436 */ 437 public static void setServiceAddress(Configuration conf, 438 String address) { 439 LOG.info("Setting ADDRESS {}", address); 440 conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address); 441 } 442 443 /** 444 * Fetches the address for services to use when connecting to namenode 445 * based on the value of fallback returns null if the special 446 * address is not specified or returns the default namenode address 447 * to be used by both clients and services. 448 * Services here are datanodes, backup node, any non client connection 449 */ 450 public static InetSocketAddress getServiceAddress(Configuration conf, 451 boolean fallback) { 452 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY); 453 if (addr == null || addr.isEmpty()) { 454 return fallback ? getAddress(conf) : null; 455 } 456 return getAddress(addr); 457 } 458 459 public static InetSocketAddress getAddress(Configuration conf) { 460 URI filesystemURI = FileSystem.getDefaultUri(conf); 461 return getAddress(filesystemURI); 462 } 463 464 465 /** 466 * @return address of file system 467 */ 468 public static InetSocketAddress getAddress(URI filesystemURI) { 469 String authority = filesystemURI.getAuthority(); 470 if (authority == null) { 471 throw new IllegalArgumentException(String.format( 472 "Invalid URI for NameNode address (check %s): %s has no authority.", 473 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString())); 474 } 475 if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase( 476 filesystemURI.getScheme())) { 477 throw new IllegalArgumentException(String.format( 478 "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.", 479 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(), 480 HdfsConstants.HDFS_URI_SCHEME)); 481 } 482 return getAddress(authority); 483 } 484 485 public static URI getUri(InetSocketAddress namenode) { 486 int port = namenode.getPort(); 487 String portString = port == DEFAULT_PORT ? "" : (":"+port); 488 return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 489 + namenode.getHostName()+portString); 490 } 491 492 // 493 // Common NameNode methods implementation for the active name-node role. 494 // 495 public NamenodeRole getRole() { 496 return role; 497 } 498 499 boolean isRole(NamenodeRole that) { 500 return role.equals(that); 501 } 502 503 /** 504 * Given a configuration get the address of the service rpc server 505 * If the service rpc is not configured returns null 506 */ 507 protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) { 508 return NameNode.getServiceAddress(conf, false); 509 } 510 511 protected InetSocketAddress getRpcServerAddress(Configuration conf) { 512 return getAddress(conf); 513 } 514 515 /** Given a configuration get the bind host of the service rpc server 516 * If the bind host is not configured returns null. 517 */ 518 protected String getServiceRpcServerBindHost(Configuration conf) { 519 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY); 520 if (addr == null || addr.isEmpty()) { 521 return null; 522 } 523 return addr; 524 } 525 526 /** Given a configuration get the bind host of the client rpc server 527 * If the bind host is not configured returns null. 528 */ 529 protected String getRpcServerBindHost(Configuration conf) { 530 String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY); 531 if (addr == null || addr.isEmpty()) { 532 return null; 533 } 534 return addr; 535 } 536 537 /** 538 * Modifies the configuration passed to contain the service rpc address setting 539 */ 540 protected void setRpcServiceServerAddress(Configuration conf, 541 InetSocketAddress serviceRPCAddress) { 542 setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress)); 543 } 544 545 protected void setRpcServerAddress(Configuration conf, 546 InetSocketAddress rpcAddress) { 547 FileSystem.setDefaultUri(conf, getUri(rpcAddress)); 548 } 549 550 protected InetSocketAddress getHttpServerAddress(Configuration conf) { 551 return getHttpAddress(conf); 552 } 553 554 /** 555 * HTTP server address for binding the endpoint. This method is 556 * for use by the NameNode and its derivatives. It may return 557 * a different address than the one that should be used by clients to 558 * connect to the NameNode. See 559 * {@link DFSConfigKeys#DFS_NAMENODE_HTTP_BIND_HOST_KEY} 560 * 561 * @param conf 562 * @return 563 */ 564 protected InetSocketAddress getHttpServerBindAddress(Configuration conf) { 565 InetSocketAddress bindAddress = getHttpServerAddress(conf); 566 567 // If DFS_NAMENODE_HTTP_BIND_HOST_KEY exists then it overrides the 568 // host name portion of DFS_NAMENODE_HTTP_ADDRESS_KEY. 569 final String bindHost = conf.getTrimmed(DFS_NAMENODE_HTTP_BIND_HOST_KEY); 570 if (bindHost != null && !bindHost.isEmpty()) { 571 bindAddress = new InetSocketAddress(bindHost, bindAddress.getPort()); 572 } 573 574 return bindAddress; 575 } 576 577 /** @return the NameNode HTTP address. */ 578 public static InetSocketAddress getHttpAddress(Configuration conf) { 579 return NetUtils.createSocketAddr( 580 conf.getTrimmed(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT)); 581 } 582 583 protected void loadNamesystem(Configuration conf) throws IOException { 584 this.namesystem = FSNamesystem.loadFromDisk(conf); 585 } 586 587 NamenodeRegistration getRegistration() { 588 return nodeRegistration; 589 } 590 591 NamenodeRegistration setRegistration() { 592 nodeRegistration = new NamenodeRegistration( 593 NetUtils.getHostPortString(rpcServer.getRpcAddress()), 594 NetUtils.getHostPortString(getHttpAddress()), 595 getFSImage().getStorage(), getRole()); 596 return nodeRegistration; 597 } 598 599 /* optimize ugi lookup for RPC operations to avoid a trip through 600 * UGI.getCurrentUser which is synch'ed 601 */ 602 public static UserGroupInformation getRemoteUser() throws IOException { 603 UserGroupInformation ugi = Server.getRemoteUser(); 604 return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser(); 605 } 606 607 608 /** 609 * Login as the configured user for the NameNode. 610 */ 611 void loginAsNameNodeUser(Configuration conf) throws IOException { 612 InetSocketAddress socAddr = getRpcServerAddress(conf); 613 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 614 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 615 } 616 617 /** 618 * Initialize name-node. 619 * 620 * @param conf the configuration 621 */ 622 protected void initialize(Configuration conf) throws IOException { 623 if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) { 624 String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY); 625 if (intervals != null) { 626 conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS, 627 intervals); 628 } 629 } 630 631 UserGroupInformation.setConfiguration(conf); 632 loginAsNameNodeUser(conf); 633 634 NameNode.initMetrics(conf, this.getRole()); 635 StartupProgressMetrics.register(startupProgress); 636 637 if (NamenodeRole.NAMENODE == role) { 638 startHttpServer(conf); 639 } 640 641 this.spanReceiverHost = SpanReceiverHost.getInstance(conf); 642 643 loadNamesystem(conf); 644 645 rpcServer = createRpcServer(conf); 646 if (clientNamenodeAddress == null) { 647 // This is expected for MiniDFSCluster. Set it now using 648 // the RPC server's bind address. 649 clientNamenodeAddress = 650 NetUtils.getHostPortString(rpcServer.getRpcAddress()); 651 LOG.info("Clients are to use " + clientNamenodeAddress + " to access" 652 + " this namenode/service."); 653 } 654 if (NamenodeRole.NAMENODE == role) { 655 httpServer.setNameNodeAddress(getNameNodeAddress()); 656 httpServer.setFSImage(getFSImage()); 657 } 658 659 pauseMonitor = new JvmPauseMonitor(conf); 660 pauseMonitor.start(); 661 metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); 662 663 startCommonServices(conf); 664 } 665 666 /** 667 * Create the RPC server implementation. Used as an extension point for the 668 * BackupNode. 669 */ 670 protected NameNodeRpcServer createRpcServer(Configuration conf) 671 throws IOException { 672 return new NameNodeRpcServer(conf, this); 673 } 674 675 /** Start the services common to active and standby states */ 676 private void startCommonServices(Configuration conf) throws IOException { 677 namesystem.startCommonServices(conf, haContext); 678 registerNNSMXBean(); 679 if (NamenodeRole.NAMENODE != role) { 680 startHttpServer(conf); 681 httpServer.setNameNodeAddress(getNameNodeAddress()); 682 httpServer.setFSImage(getFSImage()); 683 } 684 rpcServer.start(); 685 plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY, 686 ServicePlugin.class); 687 for (ServicePlugin p: plugins) { 688 try { 689 p.start(this); 690 } catch (Throwable t) { 691 LOG.warn("ServicePlugin " + p + " could not be started", t); 692 } 693 } 694 LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress()); 695 if (rpcServer.getServiceRpcAddress() != null) { 696 LOG.info(getRole() + " service RPC up at: " 697 + rpcServer.getServiceRpcAddress()); 698 } 699 } 700 701 private void stopCommonServices() { 702 if(rpcServer != null) rpcServer.stop(); 703 if(namesystem != null) namesystem.close(); 704 if (pauseMonitor != null) pauseMonitor.stop(); 705 if (plugins != null) { 706 for (ServicePlugin p : plugins) { 707 try { 708 p.stop(); 709 } catch (Throwable t) { 710 LOG.warn("ServicePlugin " + p + " could not be stopped", t); 711 } 712 } 713 } 714 stopHttpServer(); 715 } 716 717 private void startTrashEmptier(final Configuration conf) throws IOException { 718 long trashInterval = 719 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT); 720 if (trashInterval == 0) { 721 return; 722 } else if (trashInterval < 0) { 723 throw new IOException("Cannot start trash emptier with negative interval." 724 + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value."); 725 } 726 727 // This may be called from the transitionToActive code path, in which 728 // case the current user is the administrator, not the NN. The trash 729 // emptier needs to run as the NN. See HDFS-3972. 730 FileSystem fs = SecurityUtil.doAsLoginUser( 731 new PrivilegedExceptionAction<FileSystem>() { 732 @Override 733 public FileSystem run() throws IOException { 734 return FileSystem.get(conf); 735 } 736 }); 737 this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier"); 738 this.emptier.setDaemon(true); 739 this.emptier.start(); 740 } 741 742 private void stopTrashEmptier() { 743 if (this.emptier != null) { 744 emptier.interrupt(); 745 emptier = null; 746 } 747 } 748 749 private void startHttpServer(final Configuration conf) throws IOException { 750 httpServer = new NameNodeHttpServer(conf, this, getHttpServerBindAddress(conf)); 751 httpServer.start(); 752 httpServer.setStartupProgress(startupProgress); 753 } 754 755 private void stopHttpServer() { 756 try { 757 if (httpServer != null) httpServer.stop(); 758 } catch (Exception e) { 759 LOG.error("Exception while stopping httpserver", e); 760 } 761 } 762 763 /** 764 * Start NameNode. 765 * <p> 766 * The name-node can be started with one of the following startup options: 767 * <ul> 768 * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li> 769 * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li> 770 * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li> 771 * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li> 772 * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster 773 * <li>{@link StartupOption#UPGRADEONLY UPGRADEONLY} - upgrade the cluster 774 * upgrade and create a snapshot of the current file system state</li> 775 * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node 776 * metadata</li> 777 * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the 778 * cluster back to the previous state</li> 779 * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 780 * previous upgrade</li> 781 * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li> 782 * </ul> 783 * The option is passed via configuration field: 784 * <tt>dfs.namenode.startup</tt> 785 * 786 * The conf will be modified to reflect the actual ports on which 787 * the NameNode is up and running if the user passes the port as 788 * <code>zero</code> in the conf. 789 * 790 * @param conf confirguration 791 * @throws IOException 792 */ 793 public NameNode(Configuration conf) throws IOException { 794 this(conf, NamenodeRole.NAMENODE); 795 } 796 797 protected NameNode(Configuration conf, NamenodeRole role) 798 throws IOException { 799 this.conf = conf; 800 this.role = role; 801 setClientNamenodeAddress(conf); 802 String nsId = getNameServiceId(conf); 803 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 804 this.haEnabled = HAUtil.isHAEnabled(conf, nsId); 805 state = createHAState(getStartupOption(conf)); 806 this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf); 807 this.haContext = createHAContext(); 808 try { 809 initializeGenericKeys(conf, nsId, namenodeId); 810 initialize(conf); 811 try { 812 haContext.writeLock(); 813 state.prepareToEnterState(haContext); 814 state.enterState(haContext); 815 } finally { 816 haContext.writeUnlock(); 817 } 818 } catch (IOException e) { 819 this.stop(); 820 throw e; 821 } catch (HadoopIllegalArgumentException e) { 822 this.stop(); 823 throw e; 824 } 825 this.started.set(true); 826 } 827 828 protected HAState createHAState(StartupOption startOpt) { 829 if (!haEnabled || startOpt == StartupOption.UPGRADE 830 || startOpt == StartupOption.UPGRADEONLY) { 831 return ACTIVE_STATE; 832 } else { 833 return STANDBY_STATE; 834 } 835 } 836 837 protected HAContext createHAContext() { 838 return new NameNodeHAContext(); 839 } 840 841 /** 842 * Wait for service to finish. 843 * (Normally, it runs forever.) 844 */ 845 public void join() { 846 try { 847 rpcServer.join(); 848 } catch (InterruptedException ie) { 849 LOG.info("Caught interrupted exception ", ie); 850 } 851 } 852 853 /** 854 * Stop all NameNode threads and wait for all to finish. 855 */ 856 public void stop() { 857 synchronized(this) { 858 if (stopRequested) 859 return; 860 stopRequested = true; 861 } 862 try { 863 if (state != null) { 864 state.exitState(haContext); 865 } 866 } catch (ServiceFailedException e) { 867 LOG.warn("Encountered exception while exiting state ", e); 868 } finally { 869 stopCommonServices(); 870 if (metrics != null) { 871 metrics.shutdown(); 872 } 873 if (namesystem != null) { 874 namesystem.shutdown(); 875 } 876 if (nameNodeStatusBeanName != null) { 877 MBeans.unregister(nameNodeStatusBeanName); 878 nameNodeStatusBeanName = null; 879 } 880 if (this.spanReceiverHost != null) { 881 this.spanReceiverHost.closeReceivers(); 882 } 883 } 884 } 885 886 synchronized boolean isStopRequested() { 887 return stopRequested; 888 } 889 890 /** 891 * Is the cluster currently in safe mode? 892 */ 893 public boolean isInSafeMode() { 894 return namesystem.isInSafeMode(); 895 } 896 897 /** get FSImage */ 898 @VisibleForTesting 899 public FSImage getFSImage() { 900 return namesystem.getFSImage(); 901 } 902 903 /** 904 * @return NameNode RPC address 905 */ 906 public InetSocketAddress getNameNodeAddress() { 907 return rpcServer.getRpcAddress(); 908 } 909 910 /** 911 * @return NameNode RPC address in "host:port" string form 912 */ 913 public String getNameNodeAddressHostPortString() { 914 return NetUtils.getHostPortString(rpcServer.getRpcAddress()); 915 } 916 917 /** 918 * @return NameNode service RPC address if configured, the 919 * NameNode RPC address otherwise 920 */ 921 public InetSocketAddress getServiceRpcAddress() { 922 final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress(); 923 return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr; 924 } 925 926 /** 927 * @return NameNode HTTP address, used by the Web UI, image transfer, 928 * and HTTP-based file system clients like Hftp and WebHDFS 929 */ 930 public InetSocketAddress getHttpAddress() { 931 return httpServer.getHttpAddress(); 932 } 933 934 /** 935 * @return NameNode HTTPS address, used by the Web UI, image transfer, 936 * and HTTP-based file system clients like Hftp and WebHDFS 937 */ 938 public InetSocketAddress getHttpsAddress() { 939 return httpServer.getHttpsAddress(); 940 } 941 942 /** 943 * Verify that configured directories exist, then 944 * Interactively confirm that formatting is desired 945 * for each existing directory and format them. 946 * 947 * @param conf configuration to use 948 * @param force if true, format regardless of whether dirs exist 949 * @return true if formatting was aborted, false otherwise 950 * @throws IOException 951 */ 952 private static boolean format(Configuration conf, boolean force, 953 boolean isInteractive) throws IOException { 954 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 955 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 956 initializeGenericKeys(conf, nsId, namenodeId); 957 checkAllowFormat(conf); 958 959 if (UserGroupInformation.isSecurityEnabled()) { 960 InetSocketAddress socAddr = getAddress(conf); 961 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 962 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 963 } 964 965 Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf); 966 List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf); 967 List<URI> dirsToPrompt = new ArrayList<URI>(); 968 dirsToPrompt.addAll(nameDirsToFormat); 969 dirsToPrompt.addAll(sharedDirs); 970 List<URI> editDirsToFormat = 971 FSNamesystem.getNamespaceEditsDirs(conf); 972 973 // if clusterID is not provided - see if you can find the current one 974 String clusterId = StartupOption.FORMAT.getClusterId(); 975 if(clusterId == null || clusterId.equals("")) { 976 //Generate a new cluster id 977 clusterId = NNStorage.newClusterID(); 978 } 979 System.out.println("Formatting using clusterid: " + clusterId); 980 981 FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat); 982 try { 983 FSNamesystem fsn = new FSNamesystem(conf, fsImage); 984 fsImage.getEditLog().initJournalsForWrite(); 985 986 if (!fsImage.confirmFormat(force, isInteractive)) { 987 return true; // aborted 988 } 989 990 fsImage.format(fsn, clusterId); 991 } catch (IOException ioe) { 992 LOG.warn("Encountered exception during format: ", ioe); 993 fsImage.close(); 994 throw ioe; 995 } 996 return false; 997 } 998 999 public static void checkAllowFormat(Configuration conf) throws IOException { 1000 if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 1001 DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) { 1002 throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY 1003 + " is set to false for this filesystem, so it " 1004 + "cannot be formatted. You will need to set " 1005 + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter " 1006 + "to true in order to format this filesystem"); 1007 } 1008 } 1009 1010 @VisibleForTesting 1011 public static boolean initializeSharedEdits(Configuration conf) throws IOException { 1012 return initializeSharedEdits(conf, true); 1013 } 1014 1015 @VisibleForTesting 1016 public static boolean initializeSharedEdits(Configuration conf, 1017 boolean force) throws IOException { 1018 return initializeSharedEdits(conf, force, false); 1019 } 1020 1021 /** 1022 * Clone the supplied configuration but remove the shared edits dirs. 1023 * 1024 * @param conf Supplies the original configuration. 1025 * @return Cloned configuration without the shared edit dirs. 1026 * @throws IOException on failure to generate the configuration. 1027 */ 1028 private static Configuration getConfigurationWithoutSharedEdits( 1029 Configuration conf) 1030 throws IOException { 1031 List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false); 1032 String editsDirsString = Joiner.on(",").join(editsDirs); 1033 1034 Configuration confWithoutShared = new Configuration(conf); 1035 confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 1036 confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, 1037 editsDirsString); 1038 return confWithoutShared; 1039 } 1040 1041 /** 1042 * Format a new shared edits dir and copy in enough edit log segments so that 1043 * the standby NN can start up. 1044 * 1045 * @param conf configuration 1046 * @param force format regardless of whether or not the shared edits dir exists 1047 * @param interactive prompt the user when a dir exists 1048 * @return true if the command aborts, false otherwise 1049 */ 1050 private static boolean initializeSharedEdits(Configuration conf, 1051 boolean force, boolean interactive) throws IOException { 1052 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1053 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1054 initializeGenericKeys(conf, nsId, namenodeId); 1055 1056 if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) { 1057 LOG.error("No shared edits directory configured for namespace " + 1058 nsId + " namenode " + namenodeId); 1059 return false; 1060 } 1061 1062 if (UserGroupInformation.isSecurityEnabled()) { 1063 InetSocketAddress socAddr = getAddress(conf); 1064 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 1065 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 1066 } 1067 1068 NNStorage existingStorage = null; 1069 FSImage sharedEditsImage = null; 1070 try { 1071 FSNamesystem fsns = 1072 FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf)); 1073 1074 existingStorage = fsns.getFSImage().getStorage(); 1075 NamespaceInfo nsInfo = existingStorage.getNamespaceInfo(); 1076 1077 List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf); 1078 1079 sharedEditsImage = new FSImage(conf, 1080 Lists.<URI>newArrayList(), 1081 sharedEditsDirs); 1082 sharedEditsImage.getEditLog().initJournalsForWrite(); 1083 1084 if (!sharedEditsImage.confirmFormat(force, interactive)) { 1085 return true; // abort 1086 } 1087 1088 NNStorage newSharedStorage = sharedEditsImage.getStorage(); 1089 // Call Storage.format instead of FSImage.format here, since we don't 1090 // actually want to save a checkpoint - just prime the dirs with 1091 // the existing namespace info 1092 newSharedStorage.format(nsInfo); 1093 sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo); 1094 1095 // Need to make sure the edit log segments are in good shape to initialize 1096 // the shared edits dir. 1097 fsns.getFSImage().getEditLog().close(); 1098 fsns.getFSImage().getEditLog().initJournalsForWrite(); 1099 fsns.getFSImage().getEditLog().recoverUnclosedStreams(); 1100 1101 copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage, 1102 conf); 1103 } catch (IOException ioe) { 1104 LOG.error("Could not initialize shared edits dir", ioe); 1105 return true; // aborted 1106 } finally { 1107 if (sharedEditsImage != null) { 1108 try { 1109 sharedEditsImage.close(); 1110 } catch (IOException ioe) { 1111 LOG.warn("Could not close sharedEditsImage", ioe); 1112 } 1113 } 1114 // Have to unlock storage explicitly for the case when we're running in a 1115 // unit test, which runs in the same JVM as NNs. 1116 if (existingStorage != null) { 1117 try { 1118 existingStorage.unlockAll(); 1119 } catch (IOException ioe) { 1120 LOG.warn("Could not unlock storage directories", ioe); 1121 return true; // aborted 1122 } 1123 } 1124 } 1125 return false; // did not abort 1126 } 1127 1128 private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns, 1129 Collection<URI> sharedEditsDirs, NNStorage newSharedStorage, 1130 Configuration conf) throws IOException { 1131 Preconditions.checkArgument(!sharedEditsDirs.isEmpty(), 1132 "No shared edits specified"); 1133 // Copy edit log segments into the new shared edits dir. 1134 List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs); 1135 FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage, 1136 sharedEditsUris); 1137 newSharedEditLog.initJournalsForWrite(); 1138 newSharedEditLog.recoverUnclosedStreams(); 1139 1140 FSEditLog sourceEditLog = fsns.getFSImage().editLog; 1141 1142 long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId(); 1143 1144 Collection<EditLogInputStream> streams = null; 1145 try { 1146 streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0); 1147 1148 // Set the nextTxid to the CheckpointTxId+1 1149 newSharedEditLog.setNextTxId(fromTxId + 1); 1150 1151 // Copy all edits after last CheckpointTxId to shared edits dir 1152 for (EditLogInputStream stream : streams) { 1153 LOG.debug("Beginning to copy stream " + stream + " to shared edits"); 1154 FSEditLogOp op; 1155 boolean segmentOpen = false; 1156 while ((op = stream.readOp()) != null) { 1157 if (LOG.isTraceEnabled()) { 1158 LOG.trace("copying op: " + op); 1159 } 1160 if (!segmentOpen) { 1161 newSharedEditLog.startLogSegment(op.txid, false); 1162 segmentOpen = true; 1163 } 1164 1165 newSharedEditLog.logEdit(op); 1166 1167 if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) { 1168 newSharedEditLog.logSync(); 1169 newSharedEditLog.endCurrentLogSegment(false); 1170 LOG.debug("ending log segment because of END_LOG_SEGMENT op in " 1171 + stream); 1172 segmentOpen = false; 1173 } 1174 } 1175 1176 if (segmentOpen) { 1177 LOG.debug("ending log segment because of end of stream in " + stream); 1178 newSharedEditLog.logSync(); 1179 newSharedEditLog.endCurrentLogSegment(false); 1180 segmentOpen = false; 1181 } 1182 } 1183 } finally { 1184 if (streams != null) { 1185 FSEditLog.closeAllStreams(streams); 1186 } 1187 } 1188 } 1189 1190 @VisibleForTesting 1191 public static boolean doRollback(Configuration conf, 1192 boolean isConfirmationNeeded) throws IOException { 1193 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1194 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1195 initializeGenericKeys(conf, nsId, namenodeId); 1196 1197 FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf)); 1198 System.err.print( 1199 "\"rollBack\" will remove the current state of the file system,\n" 1200 + "returning you to the state prior to initiating your recent.\n" 1201 + "upgrade. This action is permanent and cannot be undone. If you\n" 1202 + "are performing a rollback in an HA environment, you should be\n" 1203 + "certain that no NameNode process is running on any host."); 1204 if (isConfirmationNeeded) { 1205 if (!confirmPrompt("Roll back file system state?")) { 1206 System.err.println("Rollback aborted."); 1207 return true; 1208 } 1209 } 1210 nsys.getFSImage().doRollback(nsys); 1211 return false; 1212 } 1213 1214 private static void printUsage(PrintStream out) { 1215 out.println(USAGE + "\n"); 1216 } 1217 1218 @VisibleForTesting 1219 static StartupOption parseArguments(String args[]) { 1220 int argsLen = (args == null) ? 0 : args.length; 1221 StartupOption startOpt = StartupOption.REGULAR; 1222 for(int i=0; i < argsLen; i++) { 1223 String cmd = args[i]; 1224 if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) { 1225 startOpt = StartupOption.FORMAT; 1226 for (i = i + 1; i < argsLen; i++) { 1227 if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1228 i++; 1229 if (i >= argsLen) { 1230 // if no cluster id specified, return null 1231 LOG.error("Must specify a valid cluster ID after the " 1232 + StartupOption.CLUSTERID.getName() + " flag"); 1233 return null; 1234 } 1235 String clusterId = args[i]; 1236 // Make sure an id is specified and not another flag 1237 if (clusterId.isEmpty() || 1238 clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) || 1239 clusterId.equalsIgnoreCase( 1240 StartupOption.NONINTERACTIVE.getName())) { 1241 LOG.error("Must specify a valid cluster ID after the " 1242 + StartupOption.CLUSTERID.getName() + " flag"); 1243 return null; 1244 } 1245 startOpt.setClusterId(clusterId); 1246 } 1247 1248 if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) { 1249 startOpt.setForceFormat(true); 1250 } 1251 1252 if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) { 1253 startOpt.setInteractiveFormat(false); 1254 } 1255 } 1256 } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) { 1257 startOpt = StartupOption.GENCLUSTERID; 1258 } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) { 1259 startOpt = StartupOption.REGULAR; 1260 } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) { 1261 startOpt = StartupOption.BACKUP; 1262 } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) { 1263 startOpt = StartupOption.CHECKPOINT; 1264 } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd) 1265 || StartupOption.UPGRADEONLY.getName().equalsIgnoreCase(cmd)) { 1266 startOpt = StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd) ? 1267 StartupOption.UPGRADE : StartupOption.UPGRADEONLY; 1268 /* Can be followed by CLUSTERID with a required parameter or 1269 * RENAMERESERVED with an optional parameter 1270 */ 1271 while (i + 1 < argsLen) { 1272 String flag = args[i + 1]; 1273 if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1274 if (i + 2 < argsLen) { 1275 i += 2; 1276 startOpt.setClusterId(args[i]); 1277 } else { 1278 LOG.error("Must specify a valid cluster ID after the " 1279 + StartupOption.CLUSTERID.getName() + " flag"); 1280 return null; 1281 } 1282 } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED 1283 .getName())) { 1284 if (i + 2 < argsLen) { 1285 FSImageFormat.setRenameReservedPairs(args[i + 2]); 1286 i += 2; 1287 } else { 1288 FSImageFormat.useDefaultRenameReservedPairs(); 1289 i += 1; 1290 } 1291 } else { 1292 LOG.error("Unknown upgrade flag " + flag); 1293 return null; 1294 } 1295 } 1296 } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) { 1297 startOpt = StartupOption.ROLLINGUPGRADE; 1298 ++i; 1299 if (i >= argsLen) { 1300 LOG.error("Must specify a rolling upgrade startup option " 1301 + RollingUpgradeStartupOption.getAllOptionString()); 1302 return null; 1303 } 1304 startOpt.setRollingUpgradeStartupOption(args[i]); 1305 } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) { 1306 startOpt = StartupOption.ROLLBACK; 1307 } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) { 1308 startOpt = StartupOption.FINALIZE; 1309 } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) { 1310 startOpt = StartupOption.IMPORT; 1311 } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) { 1312 startOpt = StartupOption.BOOTSTRAPSTANDBY; 1313 return startOpt; 1314 } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) { 1315 startOpt = StartupOption.INITIALIZESHAREDEDITS; 1316 for (i = i + 1 ; i < argsLen; i++) { 1317 if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) { 1318 startOpt.setInteractiveFormat(false); 1319 } else if (StartupOption.FORCE.getName().equals(args[i])) { 1320 startOpt.setForceFormat(true); 1321 } else { 1322 LOG.error("Invalid argument: " + args[i]); 1323 return null; 1324 } 1325 } 1326 return startOpt; 1327 } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) { 1328 if (startOpt != StartupOption.REGULAR) { 1329 throw new RuntimeException("Can't combine -recover with " + 1330 "other startup options."); 1331 } 1332 startOpt = StartupOption.RECOVER; 1333 while (++i < argsLen) { 1334 if (args[i].equalsIgnoreCase( 1335 StartupOption.FORCE.getName())) { 1336 startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE); 1337 } else { 1338 throw new RuntimeException("Error parsing recovery options: " + 1339 "can't understand option \"" + args[i] + "\""); 1340 } 1341 } 1342 } else if (StartupOption.METADATAVERSION.getName().equalsIgnoreCase(cmd)) { 1343 startOpt = StartupOption.METADATAVERSION; 1344 } else { 1345 return null; 1346 } 1347 } 1348 return startOpt; 1349 } 1350 1351 private static void setStartupOption(Configuration conf, StartupOption opt) { 1352 conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name()); 1353 } 1354 1355 static StartupOption getStartupOption(Configuration conf) { 1356 return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY, 1357 StartupOption.REGULAR.toString())); 1358 } 1359 1360 private static void doRecovery(StartupOption startOpt, Configuration conf) 1361 throws IOException { 1362 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1363 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1364 initializeGenericKeys(conf, nsId, namenodeId); 1365 if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) { 1366 if (!confirmPrompt("You have selected Metadata Recovery mode. " + 1367 "This mode is intended to recover lost metadata on a corrupt " + 1368 "filesystem. Metadata recovery mode often permanently deletes " + 1369 "data from your HDFS filesystem. Please back up your edit log " + 1370 "and fsimage before trying this!\n\n" + 1371 "Are you ready to proceed? (Y/N)\n")) { 1372 System.err.println("Recovery aborted at user request.\n"); 1373 return; 1374 } 1375 } 1376 MetaRecoveryContext.LOG.info("starting recovery..."); 1377 UserGroupInformation.setConfiguration(conf); 1378 NameNode.initMetrics(conf, startOpt.toNodeRole()); 1379 FSNamesystem fsn = null; 1380 try { 1381 fsn = FSNamesystem.loadFromDisk(conf); 1382 fsn.getFSImage().saveNamespace(fsn); 1383 MetaRecoveryContext.LOG.info("RECOVERY COMPLETE"); 1384 } catch (IOException e) { 1385 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1386 throw e; 1387 } catch (RuntimeException e) { 1388 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1389 throw e; 1390 } finally { 1391 if (fsn != null) 1392 fsn.close(); 1393 } 1394 } 1395 1396 /** 1397 * Verify that configured directories exist, then print the metadata versions 1398 * of the software and the image. 1399 * 1400 * @param conf configuration to use 1401 * @throws IOException 1402 */ 1403 private static boolean printMetadataVersion(Configuration conf) 1404 throws IOException { 1405 final String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1406 final String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1407 NameNode.initializeGenericKeys(conf, nsId, namenodeId); 1408 final FSImage fsImage = new FSImage(conf); 1409 final FSNamesystem fs = new FSNamesystem(conf, fsImage, false); 1410 return fsImage.recoverTransitionRead( 1411 StartupOption.METADATAVERSION, fs, null); 1412 } 1413 1414 public static NameNode createNameNode(String argv[], Configuration conf) 1415 throws IOException { 1416 LOG.info("createNameNode " + Arrays.asList(argv)); 1417 if (conf == null) 1418 conf = new HdfsConfiguration(); 1419 StartupOption startOpt = parseArguments(argv); 1420 if (startOpt == null) { 1421 printUsage(System.err); 1422 return null; 1423 } 1424 setStartupOption(conf, startOpt); 1425 1426 switch (startOpt) { 1427 case FORMAT: { 1428 boolean aborted = format(conf, startOpt.getForceFormat(), 1429 startOpt.getInteractiveFormat()); 1430 terminate(aborted ? 1 : 0); 1431 return null; // avoid javac warning 1432 } 1433 case GENCLUSTERID: { 1434 System.err.println("Generating new cluster id:"); 1435 System.out.println(NNStorage.newClusterID()); 1436 terminate(0); 1437 return null; 1438 } 1439 case FINALIZE: { 1440 System.err.println("Use of the argument '" + StartupOption.FINALIZE + 1441 "' is no longer supported. To finalize an upgrade, start the NN " + 1442 " and then run `hdfs dfsadmin -finalizeUpgrade'"); 1443 terminate(1); 1444 return null; // avoid javac warning 1445 } 1446 case ROLLBACK: { 1447 boolean aborted = doRollback(conf, true); 1448 terminate(aborted ? 1 : 0); 1449 return null; // avoid warning 1450 } 1451 case BOOTSTRAPSTANDBY: { 1452 String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length); 1453 int rc = BootstrapStandby.run(toolArgs, conf); 1454 terminate(rc); 1455 return null; // avoid warning 1456 } 1457 case INITIALIZESHAREDEDITS: { 1458 boolean aborted = initializeSharedEdits(conf, 1459 startOpt.getForceFormat(), 1460 startOpt.getInteractiveFormat()); 1461 terminate(aborted ? 1 : 0); 1462 return null; // avoid warning 1463 } 1464 case BACKUP: 1465 case CHECKPOINT: { 1466 NamenodeRole role = startOpt.toNodeRole(); 1467 DefaultMetricsSystem.initialize(role.toString().replace(" ", "")); 1468 return new BackupNode(conf, role); 1469 } 1470 case RECOVER: { 1471 NameNode.doRecovery(startOpt, conf); 1472 return null; 1473 } 1474 case METADATAVERSION: { 1475 printMetadataVersion(conf); 1476 terminate(0); 1477 return null; // avoid javac warning 1478 } 1479 case UPGRADEONLY: { 1480 DefaultMetricsSystem.initialize("NameNode"); 1481 new NameNode(conf); 1482 terminate(0); 1483 return null; 1484 } 1485 default: { 1486 DefaultMetricsSystem.initialize("NameNode"); 1487 return new NameNode(conf); 1488 } 1489 } 1490 } 1491 1492 /** 1493 * In federation configuration is set for a set of 1494 * namenode and secondary namenode/backup/checkpointer, which are 1495 * grouped under a logical nameservice ID. The configuration keys specific 1496 * to them have suffix set to configured nameserviceId. 1497 * 1498 * This method copies the value from specific key of format key.nameserviceId 1499 * to key, to set up the generic configuration. Once this is done, only 1500 * generic version of the configuration is read in rest of the code, for 1501 * backward compatibility and simpler code changes. 1502 * 1503 * @param conf 1504 * Configuration object to lookup specific key and to set the value 1505 * to the key passed. Note the conf object is modified 1506 * @param nameserviceId name service Id (to distinguish federated NNs) 1507 * @param namenodeId the namenode ID (to distinguish HA NNs) 1508 * @see DFSUtil#setGenericConf(Configuration, String, String, String...) 1509 */ 1510 public static void initializeGenericKeys(Configuration conf, 1511 String nameserviceId, String namenodeId) { 1512 if ((nameserviceId != null && !nameserviceId.isEmpty()) || 1513 (namenodeId != null && !namenodeId.isEmpty())) { 1514 if (nameserviceId != null) { 1515 conf.set(DFS_NAMESERVICE_ID, nameserviceId); 1516 } 1517 if (namenodeId != null) { 1518 conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId); 1519 } 1520 1521 DFSUtil.setGenericConf(conf, nameserviceId, namenodeId, 1522 NAMENODE_SPECIFIC_KEYS); 1523 DFSUtil.setGenericConf(conf, nameserviceId, null, 1524 NAMESERVICE_SPECIFIC_KEYS); 1525 } 1526 1527 // If the RPC address is set use it to (re-)configure the default FS 1528 if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) { 1529 URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 1530 + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY)); 1531 conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); 1532 LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString()); 1533 } 1534 } 1535 1536 /** 1537 * Get the name service Id for the node 1538 * @return name service Id or null if federation is not configured 1539 */ 1540 protected String getNameServiceId(Configuration conf) { 1541 return DFSUtil.getNamenodeNameServiceId(conf); 1542 } 1543 1544 /** 1545 */ 1546 public static void main(String argv[]) throws Exception { 1547 if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) { 1548 System.exit(0); 1549 } 1550 1551 try { 1552 StringUtils.startupShutdownMessage(NameNode.class, argv, LOG); 1553 NameNode namenode = createNameNode(argv, null); 1554 if (namenode != null) { 1555 namenode.join(); 1556 } 1557 } catch (Throwable e) { 1558 LOG.error("Failed to start namenode.", e); 1559 terminate(1, e); 1560 } 1561 } 1562 1563 synchronized void monitorHealth() 1564 throws HealthCheckFailedException, AccessControlException { 1565 namesystem.checkSuperuserPrivilege(); 1566 if (!haEnabled) { 1567 return; // no-op, if HA is not enabled 1568 } 1569 getNamesystem().checkAvailableResources(); 1570 if (!getNamesystem().nameNodeHasResourcesAvailable()) { 1571 throw new HealthCheckFailedException( 1572 "The NameNode has no resources available"); 1573 } 1574 } 1575 1576 synchronized void transitionToActive() 1577 throws ServiceFailedException, AccessControlException { 1578 namesystem.checkSuperuserPrivilege(); 1579 if (!haEnabled) { 1580 throw new ServiceFailedException("HA for namenode is not enabled"); 1581 } 1582 state.setState(haContext, ACTIVE_STATE); 1583 } 1584 1585 synchronized void transitionToStandby() 1586 throws ServiceFailedException, AccessControlException { 1587 namesystem.checkSuperuserPrivilege(); 1588 if (!haEnabled) { 1589 throw new ServiceFailedException("HA for namenode is not enabled"); 1590 } 1591 state.setState(haContext, STANDBY_STATE); 1592 } 1593 1594 synchronized HAServiceStatus getServiceStatus() 1595 throws ServiceFailedException, AccessControlException { 1596 namesystem.checkSuperuserPrivilege(); 1597 if (!haEnabled) { 1598 throw new ServiceFailedException("HA for namenode is not enabled"); 1599 } 1600 if (state == null) { 1601 return new HAServiceStatus(HAServiceState.INITIALIZING); 1602 } 1603 HAServiceState retState = state.getServiceState(); 1604 HAServiceStatus ret = new HAServiceStatus(retState); 1605 if (retState == HAServiceState.STANDBY) { 1606 String safemodeTip = namesystem.getSafeModeTip(); 1607 if (!safemodeTip.isEmpty()) { 1608 ret.setNotReadyToBecomeActive( 1609 "The NameNode is in safemode. " + 1610 safemodeTip); 1611 } else { 1612 ret.setReadyToBecomeActive(); 1613 } 1614 } else if (retState == HAServiceState.ACTIVE) { 1615 ret.setReadyToBecomeActive(); 1616 } else { 1617 ret.setNotReadyToBecomeActive("State is " + state); 1618 } 1619 return ret; 1620 } 1621 1622 synchronized HAServiceState getServiceState() { 1623 if (state == null) { 1624 return HAServiceState.INITIALIZING; 1625 } 1626 return state.getServiceState(); 1627 } 1628 1629 /** 1630 * Register NameNodeStatusMXBean 1631 */ 1632 private void registerNNSMXBean() { 1633 nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this); 1634 } 1635 1636 @Override // NameNodeStatusMXBean 1637 public String getNNRole() { 1638 String roleStr = ""; 1639 NamenodeRole role = getRole(); 1640 if (null != role) { 1641 roleStr = role.toString(); 1642 } 1643 return roleStr; 1644 } 1645 1646 @Override // NameNodeStatusMXBean 1647 public String getState() { 1648 String servStateStr = ""; 1649 HAServiceState servState = getServiceState(); 1650 if (null != servState) { 1651 servStateStr = servState.toString(); 1652 } 1653 return servStateStr; 1654 } 1655 1656 @Override // NameNodeStatusMXBean 1657 public String getHostAndPort() { 1658 return getNameNodeAddressHostPortString(); 1659 } 1660 1661 @Override // NameNodeStatusMXBean 1662 public boolean isSecurityEnabled() { 1663 return UserGroupInformation.isSecurityEnabled(); 1664 } 1665 1666 @Override // NameNodeStatusMXBean 1667 public long getLastHATransitionTime() { 1668 return state.getLastHATransitionTime(); 1669 } 1670 1671 /** 1672 * Shutdown the NN immediately in an ungraceful way. Used when it would be 1673 * unsafe for the NN to continue operating, e.g. during a failed HA state 1674 * transition. 1675 * 1676 * @param t exception which warrants the shutdown. Printed to the NN log 1677 * before exit. 1678 * @throws ExitException thrown only for testing. 1679 */ 1680 protected synchronized void doImmediateShutdown(Throwable t) 1681 throws ExitException { 1682 String message = "Error encountered requiring NN shutdown. " + 1683 "Shutting down immediately."; 1684 try { 1685 LOG.error(message, t); 1686 } catch (Throwable ignored) { 1687 // This is unlikely to happen, but there's nothing we can do if it does. 1688 } 1689 terminate(1, t); 1690 } 1691 1692 /** 1693 * Class used to expose {@link NameNode} as context to {@link HAState} 1694 */ 1695 protected class NameNodeHAContext implements HAContext { 1696 @Override 1697 public void setState(HAState s) { 1698 state = s; 1699 } 1700 1701 @Override 1702 public HAState getState() { 1703 return state; 1704 } 1705 1706 @Override 1707 public void startActiveServices() throws IOException { 1708 try { 1709 namesystem.startActiveServices(); 1710 startTrashEmptier(conf); 1711 } catch (Throwable t) { 1712 doImmediateShutdown(t); 1713 } 1714 } 1715 1716 @Override 1717 public void stopActiveServices() throws IOException { 1718 try { 1719 if (namesystem != null) { 1720 namesystem.stopActiveServices(); 1721 } 1722 stopTrashEmptier(); 1723 } catch (Throwable t) { 1724 doImmediateShutdown(t); 1725 } 1726 } 1727 1728 @Override 1729 public void startStandbyServices() throws IOException { 1730 try { 1731 namesystem.startStandbyServices(conf); 1732 } catch (Throwable t) { 1733 doImmediateShutdown(t); 1734 } 1735 } 1736 1737 @Override 1738 public void prepareToStopStandbyServices() throws ServiceFailedException { 1739 try { 1740 namesystem.prepareToStopStandbyServices(); 1741 } catch (Throwable t) { 1742 doImmediateShutdown(t); 1743 } 1744 } 1745 1746 @Override 1747 public void stopStandbyServices() throws IOException { 1748 try { 1749 if (namesystem != null) { 1750 namesystem.stopStandbyServices(); 1751 } 1752 } catch (Throwable t) { 1753 doImmediateShutdown(t); 1754 } 1755 } 1756 1757 @Override 1758 public void writeLock() { 1759 namesystem.writeLock(); 1760 namesystem.lockRetryCache(); 1761 } 1762 1763 @Override 1764 public void writeUnlock() { 1765 namesystem.unlockRetryCache(); 1766 namesystem.writeUnlock(); 1767 } 1768 1769 /** Check if an operation of given category is allowed */ 1770 @Override 1771 public void checkOperation(final OperationCategory op) 1772 throws StandbyException { 1773 state.checkOperation(haContext, op); 1774 } 1775 1776 @Override 1777 public boolean allowStaleReads() { 1778 return allowStaleStandbyReads; 1779 } 1780 1781 } 1782 1783 public boolean isStandbyState() { 1784 return (state.equals(STANDBY_STATE)); 1785 } 1786 1787 public boolean isActiveState() { 1788 return (state.equals(ACTIVE_STATE)); 1789 } 1790 1791 /** 1792 * Returns whether the NameNode is completely started 1793 */ 1794 boolean isStarted() { 1795 return this.started.get(); 1796 } 1797 1798 /** 1799 * Check that a request to change this node's HA state is valid. 1800 * In particular, verifies that, if auto failover is enabled, non-forced 1801 * requests from the HAAdmin CLI are rejected, and vice versa. 1802 * 1803 * @param req the request to check 1804 * @throws AccessControlException if the request is disallowed 1805 */ 1806 void checkHaStateChange(StateChangeRequestInfo req) 1807 throws AccessControlException { 1808 boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY, 1809 DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT); 1810 switch (req.getSource()) { 1811 case REQUEST_BY_USER: 1812 if (autoHaEnabled) { 1813 throw new AccessControlException( 1814 "Manual HA control for this NameNode is disallowed, because " + 1815 "automatic HA is enabled."); 1816 } 1817 break; 1818 case REQUEST_BY_USER_FORCED: 1819 if (autoHaEnabled) { 1820 LOG.warn("Allowing manual HA control from " + 1821 Server.getRemoteAddress() + 1822 " even though automatic HA is enabled, because the user " + 1823 "specified the force flag"); 1824 } 1825 break; 1826 case REQUEST_BY_ZKFC: 1827 if (!autoHaEnabled) { 1828 throw new AccessControlException( 1829 "Request from ZK failover controller at " + 1830 Server.getRemoteAddress() + " denied since automatic HA " + 1831 "is not enabled"); 1832 } 1833 break; 1834 } 1835 } 1836}