001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import com.google.common.annotations.VisibleForTesting; 021import com.google.common.base.Joiner; 022import com.google.common.base.Preconditions; 023import com.google.common.collect.Lists; 024import org.apache.hadoop.HadoopIllegalArgumentException; 025import org.apache.hadoop.classification.InterfaceAudience; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.fs.FileSystem; 028import org.apache.hadoop.fs.Trash; 029import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 030import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; 031import org.apache.hadoop.ha.HAServiceStatus; 032import org.apache.hadoop.ha.HealthCheckFailedException; 033import org.apache.hadoop.ha.ServiceFailedException; 034import org.apache.hadoop.hdfs.DFSConfigKeys; 035import org.apache.hadoop.hdfs.DFSUtil; 036import org.apache.hadoop.hdfs.HAUtil; 037import org.apache.hadoop.hdfs.HdfsConfiguration; 038import org.apache.hadoop.hdfs.protocol.ClientProtocol; 039import org.apache.hadoop.hdfs.protocol.HdfsConstants; 040import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 041import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 042import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 043import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState; 044import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby; 045import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; 046import org.apache.hadoop.hdfs.server.namenode.ha.HAState; 047import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState; 048import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 049import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 050import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics; 051import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; 052import org.apache.hadoop.hdfs.server.protocol.JournalProtocol; 053import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; 054import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; 055import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; 056import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 057import org.apache.hadoop.ipc.RefreshCallQueueProtocol; 058import org.apache.hadoop.ipc.Server; 059import org.apache.hadoop.ipc.StandbyException; 060import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 061import org.apache.hadoop.metrics2.util.MBeans; 062import org.apache.hadoop.net.NetUtils; 063import org.apache.hadoop.security.AccessControlException; 064import org.apache.hadoop.security.RefreshUserMappingsProtocol; 065import org.apache.hadoop.security.SecurityUtil; 066import org.apache.hadoop.security.UserGroupInformation; 067import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; 068import org.apache.hadoop.tools.GetUserMappingsProtocol; 069import org.apache.hadoop.tracing.SpanReceiverHost; 070import org.apache.hadoop.tracing.TraceAdminProtocol; 071import org.apache.hadoop.util.ExitUtil.ExitException; 072import org.apache.hadoop.util.GenericOptionsParser; 073import org.apache.hadoop.util.JvmPauseMonitor; 074import org.apache.hadoop.util.ServicePlugin; 075import org.apache.hadoop.util.StringUtils; 076import org.apache.log4j.LogManager; 077import org.slf4j.Logger; 078import org.slf4j.LoggerFactory; 079 080import javax.management.ObjectName; 081 082import java.io.IOException; 083import java.io.PrintStream; 084import java.net.InetSocketAddress; 085import java.net.URI; 086import java.security.PrivilegedExceptionAction; 087import java.util.ArrayList; 088import java.util.Arrays; 089import java.util.Collection; 090import java.util.List; 091import java.util.concurrent.atomic.AtomicBoolean; 092 093import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 094import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 095import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 096import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT; 097import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_AUTO_FAILOVER_ENABLED_KEY; 098import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY; 099import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY; 100import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_ZKFC_PORT_KEY; 101import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY; 102import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY; 103import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY; 104import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY; 105import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY; 106import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY; 107import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY; 108import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY; 109import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTPS_BIND_HOST_KEY; 110import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_DEFAULT; 111import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY; 112import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_BIND_HOST_KEY; 113import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY; 114import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY; 115import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY; 116import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; 117import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PLUGINS_KEY; 118import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY; 119import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_BIND_HOST_KEY; 120import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY; 121import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY; 122import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY; 123import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY; 124import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; 125import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_STARTUP_KEY; 126import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT; 127import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY; 128import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICE_ID; 129import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY; 130import static org.apache.hadoop.hdfs.DFSConfigKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS; 131import static org.apache.hadoop.util.ExitUtil.terminate; 132import static org.apache.hadoop.util.ToolRunner.confirmPrompt; 133 134/********************************************************** 135 * NameNode serves as both directory namespace manager and 136 * "inode table" for the Hadoop DFS. There is a single NameNode 137 * running in any DFS deployment. (Well, except when there 138 * is a second backup/failover NameNode, or when using federated NameNodes.) 139 * 140 * The NameNode controls two critical tables: 141 * 1) filename->blocksequence (namespace) 142 * 2) block->machinelist ("inodes") 143 * 144 * The first table is stored on disk and is very precious. 145 * The second table is rebuilt every time the NameNode comes up. 146 * 147 * 'NameNode' refers to both this class as well as the 'NameNode server'. 148 * The 'FSNamesystem' class actually performs most of the filesystem 149 * management. The majority of the 'NameNode' class itself is concerned 150 * with exposing the IPC interface and the HTTP server to the outside world, 151 * plus some configuration management. 152 * 153 * NameNode implements the 154 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which 155 * allows clients to ask for DFS services. 156 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for 157 * direct use by authors of DFS client code. End-users should instead use the 158 * {@link org.apache.hadoop.fs.FileSystem} class. 159 * 160 * NameNode also implements the 161 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface, 162 * used by DataNodes that actually store DFS data blocks. These 163 * methods are invoked repeatedly and automatically by all the 164 * DataNodes in a DFS deployment. 165 * 166 * NameNode also implements the 167 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface, 168 * used by secondary namenodes or rebalancing processes to get partial 169 * NameNode state, for example partial blocksMap etc. 170 **********************************************************/ 171@InterfaceAudience.Private 172public class NameNode implements NameNodeStatusMXBean { 173 static{ 174 HdfsConfiguration.init(); 175 } 176 177 /** 178 * Categories of operations supported by the namenode. 179 */ 180 public static enum OperationCategory { 181 /** Operations that are state agnostic */ 182 UNCHECKED, 183 /** Read operation that does not change the namespace state */ 184 READ, 185 /** Write operation that changes the namespace state */ 186 WRITE, 187 /** Operations related to checkpointing */ 188 CHECKPOINT, 189 /** Operations related to {@link JournalProtocol} */ 190 JOURNAL 191 } 192 193 /** 194 * HDFS configuration can have three types of parameters: 195 * <ol> 196 * <li>Parameters that are common for all the name services in the cluster.</li> 197 * <li>Parameters that are specific to a name service. These keys are suffixed 198 * with nameserviceId in the configuration. For example, 199 * "dfs.namenode.rpc-address.nameservice1".</li> 200 * <li>Parameters that are specific to a single name node. These keys are suffixed 201 * with nameserviceId and namenodeId in the configuration. for example, 202 * "dfs.namenode.rpc-address.nameservice1.namenode1"</li> 203 * </ol> 204 * 205 * In the latter cases, operators may specify the configuration without 206 * any suffix, with a nameservice suffix, or with a nameservice and namenode 207 * suffix. The more specific suffix will take precedence. 208 * 209 * These keys are specific to a given namenode, and thus may be configured 210 * globally, for a nameservice, or for a specific namenode within a nameservice. 211 */ 212 public static final String[] NAMENODE_SPECIFIC_KEYS = { 213 DFS_NAMENODE_RPC_ADDRESS_KEY, 214 DFS_NAMENODE_RPC_BIND_HOST_KEY, 215 DFS_NAMENODE_NAME_DIR_KEY, 216 DFS_NAMENODE_EDITS_DIR_KEY, 217 DFS_NAMENODE_SHARED_EDITS_DIR_KEY, 218 DFS_NAMENODE_CHECKPOINT_DIR_KEY, 219 DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY, 220 DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, 221 DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY, 222 DFS_NAMENODE_HTTP_ADDRESS_KEY, 223 DFS_NAMENODE_HTTPS_ADDRESS_KEY, 224 DFS_NAMENODE_HTTP_BIND_HOST_KEY, 225 DFS_NAMENODE_HTTPS_BIND_HOST_KEY, 226 DFS_NAMENODE_KEYTAB_FILE_KEY, 227 DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, 228 DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY, 229 DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY, 230 DFS_NAMENODE_BACKUP_ADDRESS_KEY, 231 DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY, 232 DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY, 233 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, 234 DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY, 235 DFS_HA_FENCE_METHODS_KEY, 236 DFS_HA_ZKFC_PORT_KEY, 237 DFS_HA_FENCE_METHODS_KEY 238 }; 239 240 /** 241 * @see #NAMENODE_SPECIFIC_KEYS 242 * These keys are specific to a nameservice, but may not be overridden 243 * for a specific namenode. 244 */ 245 public static final String[] NAMESERVICE_SPECIFIC_KEYS = { 246 DFS_HA_AUTO_FAILOVER_ENABLED_KEY 247 }; 248 249 private static final String USAGE = "Usage: java NameNode [" 250 + StartupOption.BACKUP.getName() + "] | \n\t[" 251 + StartupOption.CHECKPOINT.getName() + "] | \n\t[" 252 + StartupOption.FORMAT.getName() + " [" 253 + StartupOption.CLUSTERID.getName() + " cid ] [" 254 + StartupOption.FORCE.getName() + "] [" 255 + StartupOption.NONINTERACTIVE.getName() + "] ] | \n\t[" 256 + StartupOption.UPGRADE.getName() + 257 " [" + StartupOption.CLUSTERID.getName() + " cid]" + 258 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t[" 259 + StartupOption.UPGRADEONLY.getName() + 260 " [" + StartupOption.CLUSTERID.getName() + " cid]" + 261 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t[" 262 + StartupOption.ROLLBACK.getName() + "] | \n\t[" 263 + StartupOption.ROLLINGUPGRADE.getName() + " " 264 + RollingUpgradeStartupOption.getAllOptionString() + " ] | \n\t[" 265 + StartupOption.FINALIZE.getName() + "] | \n\t[" 266 + StartupOption.IMPORT.getName() + "] | \n\t[" 267 + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | \n\t[" 268 + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | \n\t[" 269 + StartupOption.RECOVER.getName() + " [ " 270 + StartupOption.FORCE.getName() + "] ] | \n\t[" 271 + StartupOption.METADATAVERSION.getName() + " ] " 272 + " ]"; 273 274 275 public long getProtocolVersion(String protocol, 276 long clientVersion) throws IOException { 277 if (protocol.equals(ClientProtocol.class.getName())) { 278 return ClientProtocol.versionID; 279 } else if (protocol.equals(DatanodeProtocol.class.getName())){ 280 return DatanodeProtocol.versionID; 281 } else if (protocol.equals(NamenodeProtocol.class.getName())){ 282 return NamenodeProtocol.versionID; 283 } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){ 284 return RefreshAuthorizationPolicyProtocol.versionID; 285 } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){ 286 return RefreshUserMappingsProtocol.versionID; 287 } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) { 288 return RefreshCallQueueProtocol.versionID; 289 } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){ 290 return GetUserMappingsProtocol.versionID; 291 } else if (protocol.equals(TraceAdminProtocol.class.getName())){ 292 return TraceAdminProtocol.versionID; 293 } else { 294 throw new IOException("Unknown protocol to name node: " + protocol); 295 } 296 } 297 298 public static final int DEFAULT_PORT = 8020; 299 public static final Logger LOG = 300 LoggerFactory.getLogger(NameNode.class.getName()); 301 public static final Logger stateChangeLog = 302 LoggerFactory.getLogger("org.apache.hadoop.hdfs.StateChange"); 303 public static final Logger blockStateChangeLog = 304 LoggerFactory.getLogger("BlockStateChange"); 305 public static final HAState ACTIVE_STATE = new ActiveState(); 306 public static final HAState STANDBY_STATE = new StandbyState(); 307 308 protected FSNamesystem namesystem; 309 protected final Configuration conf; 310 protected final NamenodeRole role; 311 private volatile HAState state; 312 private final boolean haEnabled; 313 private final HAContext haContext; 314 protected final boolean allowStaleStandbyReads; 315 private AtomicBoolean started = new AtomicBoolean(false); 316 317 318 /** httpServer */ 319 protected NameNodeHttpServer httpServer; 320 private Thread emptier; 321 /** only used for testing purposes */ 322 protected boolean stopRequested = false; 323 /** Registration information of this name-node */ 324 protected NamenodeRegistration nodeRegistration; 325 /** Activated plug-ins. */ 326 private List<ServicePlugin> plugins; 327 328 private NameNodeRpcServer rpcServer; 329 330 private JvmPauseMonitor pauseMonitor; 331 private ObjectName nameNodeStatusBeanName; 332 SpanReceiverHost spanReceiverHost; 333 /** 334 * The namenode address that clients will use to access this namenode 335 * or the name service. For HA configurations using logical URI, it 336 * will be the logical address. 337 */ 338 private String clientNamenodeAddress; 339 340 /** Format a new filesystem. Destroys any filesystem that may already 341 * exist at this location. **/ 342 public static void format(Configuration conf) throws IOException { 343 format(conf, true, true); 344 } 345 346 static NameNodeMetrics metrics; 347 private static final StartupProgress startupProgress = new StartupProgress(); 348 /** Return the {@link FSNamesystem} object. 349 * @return {@link FSNamesystem} object. 350 */ 351 public FSNamesystem getNamesystem() { 352 return namesystem; 353 } 354 355 public NamenodeProtocols getRpcServer() { 356 return rpcServer; 357 } 358 359 static void initMetrics(Configuration conf, NamenodeRole role) { 360 metrics = NameNodeMetrics.create(conf, role); 361 } 362 363 public static NameNodeMetrics getNameNodeMetrics() { 364 return metrics; 365 } 366 367 /** 368 * Returns object used for reporting namenode startup progress. 369 * 370 * @return StartupProgress for reporting namenode startup progress 371 */ 372 public static StartupProgress getStartupProgress() { 373 return startupProgress; 374 } 375 376 /** 377 * Return the service name of the issued delegation token. 378 * 379 * @return The name service id in HA-mode, or the rpc address in non-HA mode 380 */ 381 public String getTokenServiceName() { 382 return getClientNamenodeAddress(); 383 } 384 385 /** 386 * Set the namenode address that will be used by clients to access this 387 * namenode or name service. This needs to be called before the config 388 * is overriden. 389 */ 390 public void setClientNamenodeAddress(Configuration conf) { 391 String nnAddr = conf.get(FS_DEFAULT_NAME_KEY); 392 if (nnAddr == null) { 393 // default fs is not set. 394 clientNamenodeAddress = null; 395 return; 396 } 397 398 LOG.info("{} is {}", FS_DEFAULT_NAME_KEY, nnAddr); 399 URI nnUri = URI.create(nnAddr); 400 401 String nnHost = nnUri.getHost(); 402 if (nnHost == null) { 403 clientNamenodeAddress = null; 404 return; 405 } 406 407 if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) { 408 // host name is logical 409 clientNamenodeAddress = nnHost; 410 } else if (nnUri.getPort() > 0) { 411 // physical address with a valid port 412 clientNamenodeAddress = nnUri.getAuthority(); 413 } else { 414 // the port is missing or 0. Figure out real bind address later. 415 clientNamenodeAddress = null; 416 return; 417 } 418 LOG.info("Clients are to use {} to access" 419 + " this namenode/service.", clientNamenodeAddress ); 420 } 421 422 /** 423 * Get the namenode address to be used by clients. 424 * @return nn address 425 */ 426 public String getClientNamenodeAddress() { 427 return clientNamenodeAddress; 428 } 429 430 public static InetSocketAddress getAddress(String address) { 431 return NetUtils.createSocketAddr(address, DEFAULT_PORT); 432 } 433 434 /** 435 * Set the configuration property for the service rpc address 436 * to address 437 */ 438 public static void setServiceAddress(Configuration conf, 439 String address) { 440 LOG.info("Setting ADDRESS {}", address); 441 conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address); 442 } 443 444 /** 445 * Fetches the address for services to use when connecting to namenode 446 * based on the value of fallback returns null if the special 447 * address is not specified or returns the default namenode address 448 * to be used by both clients and services. 449 * Services here are datanodes, backup node, any non client connection 450 */ 451 public static InetSocketAddress getServiceAddress(Configuration conf, 452 boolean fallback) { 453 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY); 454 if (addr == null || addr.isEmpty()) { 455 return fallback ? getAddress(conf) : null; 456 } 457 return getAddress(addr); 458 } 459 460 public static InetSocketAddress getAddress(Configuration conf) { 461 URI filesystemURI = FileSystem.getDefaultUri(conf); 462 return getAddress(filesystemURI); 463 } 464 465 466 /** 467 * @return address of file system 468 */ 469 public static InetSocketAddress getAddress(URI filesystemURI) { 470 String authority = filesystemURI.getAuthority(); 471 if (authority == null) { 472 throw new IllegalArgumentException(String.format( 473 "Invalid URI for NameNode address (check %s): %s has no authority.", 474 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString())); 475 } 476 if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase( 477 filesystemURI.getScheme())) { 478 throw new IllegalArgumentException(String.format( 479 "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.", 480 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(), 481 HdfsConstants.HDFS_URI_SCHEME)); 482 } 483 return getAddress(authority); 484 } 485 486 public static URI getUri(InetSocketAddress namenode) { 487 int port = namenode.getPort(); 488 String portString = port == DEFAULT_PORT ? "" : (":"+port); 489 return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 490 + namenode.getHostName()+portString); 491 } 492 493 // 494 // Common NameNode methods implementation for the active name-node role. 495 // 496 public NamenodeRole getRole() { 497 return role; 498 } 499 500 boolean isRole(NamenodeRole that) { 501 return role.equals(that); 502 } 503 504 /** 505 * Given a configuration get the address of the service rpc server 506 * If the service rpc is not configured returns null 507 */ 508 protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) { 509 return NameNode.getServiceAddress(conf, false); 510 } 511 512 protected InetSocketAddress getRpcServerAddress(Configuration conf) { 513 return getAddress(conf); 514 } 515 516 /** Given a configuration get the bind host of the service rpc server 517 * If the bind host is not configured returns null. 518 */ 519 protected String getServiceRpcServerBindHost(Configuration conf) { 520 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY); 521 if (addr == null || addr.isEmpty()) { 522 return null; 523 } 524 return addr; 525 } 526 527 /** Given a configuration get the bind host of the client rpc server 528 * If the bind host is not configured returns null. 529 */ 530 protected String getRpcServerBindHost(Configuration conf) { 531 String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY); 532 if (addr == null || addr.isEmpty()) { 533 return null; 534 } 535 return addr; 536 } 537 538 /** 539 * Modifies the configuration passed to contain the service rpc address setting 540 */ 541 protected void setRpcServiceServerAddress(Configuration conf, 542 InetSocketAddress serviceRPCAddress) { 543 setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress)); 544 } 545 546 protected void setRpcServerAddress(Configuration conf, 547 InetSocketAddress rpcAddress) { 548 FileSystem.setDefaultUri(conf, getUri(rpcAddress)); 549 } 550 551 protected InetSocketAddress getHttpServerAddress(Configuration conf) { 552 return getHttpAddress(conf); 553 } 554 555 /** 556 * HTTP server address for binding the endpoint. This method is 557 * for use by the NameNode and its derivatives. It may return 558 * a different address than the one that should be used by clients to 559 * connect to the NameNode. See 560 * {@link DFSConfigKeys#DFS_NAMENODE_HTTP_BIND_HOST_KEY} 561 * 562 * @param conf 563 * @return 564 */ 565 protected InetSocketAddress getHttpServerBindAddress(Configuration conf) { 566 InetSocketAddress bindAddress = getHttpServerAddress(conf); 567 568 // If DFS_NAMENODE_HTTP_BIND_HOST_KEY exists then it overrides the 569 // host name portion of DFS_NAMENODE_HTTP_ADDRESS_KEY. 570 final String bindHost = conf.getTrimmed(DFS_NAMENODE_HTTP_BIND_HOST_KEY); 571 if (bindHost != null && !bindHost.isEmpty()) { 572 bindAddress = new InetSocketAddress(bindHost, bindAddress.getPort()); 573 } 574 575 return bindAddress; 576 } 577 578 /** @return the NameNode HTTP address. */ 579 public static InetSocketAddress getHttpAddress(Configuration conf) { 580 return NetUtils.createSocketAddr( 581 conf.getTrimmed(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT)); 582 } 583 584 protected void loadNamesystem(Configuration conf) throws IOException { 585 this.namesystem = FSNamesystem.loadFromDisk(conf); 586 } 587 588 NamenodeRegistration getRegistration() { 589 return nodeRegistration; 590 } 591 592 NamenodeRegistration setRegistration() { 593 nodeRegistration = new NamenodeRegistration( 594 NetUtils.getHostPortString(rpcServer.getRpcAddress()), 595 NetUtils.getHostPortString(getHttpAddress()), 596 getFSImage().getStorage(), getRole()); 597 return nodeRegistration; 598 } 599 600 /* optimize ugi lookup for RPC operations to avoid a trip through 601 * UGI.getCurrentUser which is synch'ed 602 */ 603 public static UserGroupInformation getRemoteUser() throws IOException { 604 UserGroupInformation ugi = Server.getRemoteUser(); 605 return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser(); 606 } 607 608 609 /** 610 * Login as the configured user for the NameNode. 611 */ 612 void loginAsNameNodeUser(Configuration conf) throws IOException { 613 InetSocketAddress socAddr = getRpcServerAddress(conf); 614 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 615 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 616 } 617 618 /** 619 * Initialize name-node. 620 * 621 * @param conf the configuration 622 */ 623 protected void initialize(Configuration conf) throws IOException { 624 if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) { 625 String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY); 626 if (intervals != null) { 627 conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS, 628 intervals); 629 } 630 } 631 632 UserGroupInformation.setConfiguration(conf); 633 loginAsNameNodeUser(conf); 634 635 NameNode.initMetrics(conf, this.getRole()); 636 StartupProgressMetrics.register(startupProgress); 637 638 if (NamenodeRole.NAMENODE == role) { 639 startHttpServer(conf); 640 } 641 642 this.spanReceiverHost = 643 SpanReceiverHost.get(conf, DFSConfigKeys.DFS_SERVER_HTRACE_PREFIX); 644 645 loadNamesystem(conf); 646 647 rpcServer = createRpcServer(conf); 648 if (clientNamenodeAddress == null) { 649 // This is expected for MiniDFSCluster. Set it now using 650 // the RPC server's bind address. 651 clientNamenodeAddress = 652 NetUtils.getHostPortString(rpcServer.getRpcAddress()); 653 LOG.info("Clients are to use " + clientNamenodeAddress + " to access" 654 + " this namenode/service."); 655 } 656 if (NamenodeRole.NAMENODE == role) { 657 httpServer.setNameNodeAddress(getNameNodeAddress()); 658 httpServer.setFSImage(getFSImage()); 659 } 660 661 pauseMonitor = new JvmPauseMonitor(conf); 662 pauseMonitor.start(); 663 metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); 664 665 startCommonServices(conf); 666 } 667 668 /** 669 * Create the RPC server implementation. Used as an extension point for the 670 * BackupNode. 671 */ 672 protected NameNodeRpcServer createRpcServer(Configuration conf) 673 throws IOException { 674 return new NameNodeRpcServer(conf, this); 675 } 676 677 /** Start the services common to active and standby states */ 678 private void startCommonServices(Configuration conf) throws IOException { 679 namesystem.startCommonServices(conf, haContext); 680 registerNNSMXBean(); 681 if (NamenodeRole.NAMENODE != role) { 682 startHttpServer(conf); 683 httpServer.setNameNodeAddress(getNameNodeAddress()); 684 httpServer.setFSImage(getFSImage()); 685 } 686 rpcServer.start(); 687 plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY, 688 ServicePlugin.class); 689 for (ServicePlugin p: plugins) { 690 try { 691 p.start(this); 692 } catch (Throwable t) { 693 LOG.warn("ServicePlugin " + p + " could not be started", t); 694 } 695 } 696 LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress()); 697 if (rpcServer.getServiceRpcAddress() != null) { 698 LOG.info(getRole() + " service RPC up at: " 699 + rpcServer.getServiceRpcAddress()); 700 } 701 } 702 703 private void stopCommonServices() { 704 if(rpcServer != null) rpcServer.stop(); 705 if(namesystem != null) namesystem.close(); 706 if (pauseMonitor != null) pauseMonitor.stop(); 707 if (plugins != null) { 708 for (ServicePlugin p : plugins) { 709 try { 710 p.stop(); 711 } catch (Throwable t) { 712 LOG.warn("ServicePlugin " + p + " could not be stopped", t); 713 } 714 } 715 } 716 stopHttpServer(); 717 } 718 719 private void startTrashEmptier(final Configuration conf) throws IOException { 720 long trashInterval = 721 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT); 722 if (trashInterval == 0) { 723 return; 724 } else if (trashInterval < 0) { 725 throw new IOException("Cannot start trash emptier with negative interval." 726 + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value."); 727 } 728 729 // This may be called from the transitionToActive code path, in which 730 // case the current user is the administrator, not the NN. The trash 731 // emptier needs to run as the NN. See HDFS-3972. 732 FileSystem fs = SecurityUtil.doAsLoginUser( 733 new PrivilegedExceptionAction<FileSystem>() { 734 @Override 735 public FileSystem run() throws IOException { 736 return FileSystem.get(conf); 737 } 738 }); 739 this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier"); 740 this.emptier.setDaemon(true); 741 this.emptier.start(); 742 } 743 744 private void stopTrashEmptier() { 745 if (this.emptier != null) { 746 emptier.interrupt(); 747 emptier = null; 748 } 749 } 750 751 private void startHttpServer(final Configuration conf) throws IOException { 752 httpServer = new NameNodeHttpServer(conf, this, getHttpServerBindAddress(conf)); 753 httpServer.start(); 754 httpServer.setStartupProgress(startupProgress); 755 } 756 757 private void stopHttpServer() { 758 try { 759 if (httpServer != null) httpServer.stop(); 760 } catch (Exception e) { 761 LOG.error("Exception while stopping httpserver", e); 762 } 763 } 764 765 /** 766 * Start NameNode. 767 * <p> 768 * The name-node can be started with one of the following startup options: 769 * <ul> 770 * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li> 771 * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li> 772 * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li> 773 * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li> 774 * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster 775 * <li>{@link StartupOption#UPGRADEONLY UPGRADEONLY} - upgrade the cluster 776 * upgrade and create a snapshot of the current file system state</li> 777 * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node 778 * metadata</li> 779 * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the 780 * cluster back to the previous state</li> 781 * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 782 * previous upgrade</li> 783 * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li> 784 * </ul> 785 * The option is passed via configuration field: 786 * <tt>dfs.namenode.startup</tt> 787 * 788 * The conf will be modified to reflect the actual ports on which 789 * the NameNode is up and running if the user passes the port as 790 * <code>zero</code> in the conf. 791 * 792 * @param conf confirguration 793 * @throws IOException 794 */ 795 public NameNode(Configuration conf) throws IOException { 796 this(conf, NamenodeRole.NAMENODE); 797 } 798 799 protected NameNode(Configuration conf, NamenodeRole role) 800 throws IOException { 801 this.conf = conf; 802 this.role = role; 803 setClientNamenodeAddress(conf); 804 String nsId = getNameServiceId(conf); 805 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 806 this.haEnabled = HAUtil.isHAEnabled(conf, nsId); 807 state = createHAState(getStartupOption(conf)); 808 this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf); 809 this.haContext = createHAContext(); 810 try { 811 initializeGenericKeys(conf, nsId, namenodeId); 812 initialize(conf); 813 try { 814 haContext.writeLock(); 815 state.prepareToEnterState(haContext); 816 state.enterState(haContext); 817 } finally { 818 haContext.writeUnlock(); 819 } 820 } catch (IOException e) { 821 this.stop(); 822 throw e; 823 } catch (HadoopIllegalArgumentException e) { 824 this.stop(); 825 throw e; 826 } 827 this.started.set(true); 828 } 829 830 protected HAState createHAState(StartupOption startOpt) { 831 if (!haEnabled || startOpt == StartupOption.UPGRADE 832 || startOpt == StartupOption.UPGRADEONLY) { 833 return ACTIVE_STATE; 834 } else { 835 return STANDBY_STATE; 836 } 837 } 838 839 protected HAContext createHAContext() { 840 return new NameNodeHAContext(); 841 } 842 843 /** 844 * Wait for service to finish. 845 * (Normally, it runs forever.) 846 */ 847 public void join() { 848 try { 849 rpcServer.join(); 850 } catch (InterruptedException ie) { 851 LOG.info("Caught interrupted exception ", ie); 852 } 853 } 854 855 /** 856 * Stop all NameNode threads and wait for all to finish. 857 */ 858 public void stop() { 859 synchronized(this) { 860 if (stopRequested) 861 return; 862 stopRequested = true; 863 } 864 try { 865 if (state != null) { 866 state.exitState(haContext); 867 } 868 } catch (ServiceFailedException e) { 869 LOG.warn("Encountered exception while exiting state ", e); 870 } finally { 871 stopCommonServices(); 872 if (metrics != null) { 873 metrics.shutdown(); 874 } 875 if (namesystem != null) { 876 namesystem.shutdown(); 877 } 878 if (nameNodeStatusBeanName != null) { 879 MBeans.unregister(nameNodeStatusBeanName); 880 nameNodeStatusBeanName = null; 881 } 882 if (this.spanReceiverHost != null) { 883 this.spanReceiverHost.closeReceivers(); 884 } 885 } 886 } 887 888 synchronized boolean isStopRequested() { 889 return stopRequested; 890 } 891 892 /** 893 * Is the cluster currently in safe mode? 894 */ 895 public boolean isInSafeMode() { 896 return namesystem.isInSafeMode(); 897 } 898 899 /** get FSImage */ 900 @VisibleForTesting 901 public FSImage getFSImage() { 902 return namesystem.getFSImage(); 903 } 904 905 /** 906 * @return NameNode RPC address 907 */ 908 public InetSocketAddress getNameNodeAddress() { 909 return rpcServer.getRpcAddress(); 910 } 911 912 /** 913 * @return NameNode RPC address in "host:port" string form 914 */ 915 public String getNameNodeAddressHostPortString() { 916 return NetUtils.getHostPortString(rpcServer.getRpcAddress()); 917 } 918 919 /** 920 * @return NameNode service RPC address if configured, the 921 * NameNode RPC address otherwise 922 */ 923 public InetSocketAddress getServiceRpcAddress() { 924 final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress(); 925 return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr; 926 } 927 928 /** 929 * @return NameNode HTTP address, used by the Web UI, image transfer, 930 * and HTTP-based file system clients like Hftp and WebHDFS 931 */ 932 public InetSocketAddress getHttpAddress() { 933 return httpServer.getHttpAddress(); 934 } 935 936 /** 937 * @return NameNode HTTPS address, used by the Web UI, image transfer, 938 * and HTTP-based file system clients like Hftp and WebHDFS 939 */ 940 public InetSocketAddress getHttpsAddress() { 941 return httpServer.getHttpsAddress(); 942 } 943 944 /** 945 * Verify that configured directories exist, then 946 * Interactively confirm that formatting is desired 947 * for each existing directory and format them. 948 * 949 * @param conf configuration to use 950 * @param force if true, format regardless of whether dirs exist 951 * @return true if formatting was aborted, false otherwise 952 * @throws IOException 953 */ 954 private static boolean format(Configuration conf, boolean force, 955 boolean isInteractive) throws IOException { 956 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 957 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 958 initializeGenericKeys(conf, nsId, namenodeId); 959 checkAllowFormat(conf); 960 961 if (UserGroupInformation.isSecurityEnabled()) { 962 InetSocketAddress socAddr = getAddress(conf); 963 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 964 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 965 } 966 967 Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf); 968 List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf); 969 List<URI> dirsToPrompt = new ArrayList<URI>(); 970 dirsToPrompt.addAll(nameDirsToFormat); 971 dirsToPrompt.addAll(sharedDirs); 972 List<URI> editDirsToFormat = 973 FSNamesystem.getNamespaceEditsDirs(conf); 974 975 // if clusterID is not provided - see if you can find the current one 976 String clusterId = StartupOption.FORMAT.getClusterId(); 977 if(clusterId == null || clusterId.equals("")) { 978 //Generate a new cluster id 979 clusterId = NNStorage.newClusterID(); 980 } 981 System.out.println("Formatting using clusterid: " + clusterId); 982 983 FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat); 984 try { 985 FSNamesystem fsn = new FSNamesystem(conf, fsImage); 986 fsImage.getEditLog().initJournalsForWrite(); 987 988 if (!fsImage.confirmFormat(force, isInteractive)) { 989 return true; // aborted 990 } 991 992 fsImage.format(fsn, clusterId); 993 } catch (IOException ioe) { 994 LOG.warn("Encountered exception during format: ", ioe); 995 fsImage.close(); 996 throw ioe; 997 } 998 return false; 999 } 1000 1001 public static void checkAllowFormat(Configuration conf) throws IOException { 1002 if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 1003 DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) { 1004 throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY 1005 + " is set to false for this filesystem, so it " 1006 + "cannot be formatted. You will need to set " 1007 + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter " 1008 + "to true in order to format this filesystem"); 1009 } 1010 } 1011 1012 @VisibleForTesting 1013 public static boolean initializeSharedEdits(Configuration conf) throws IOException { 1014 return initializeSharedEdits(conf, true); 1015 } 1016 1017 @VisibleForTesting 1018 public static boolean initializeSharedEdits(Configuration conf, 1019 boolean force) throws IOException { 1020 return initializeSharedEdits(conf, force, false); 1021 } 1022 1023 /** 1024 * Clone the supplied configuration but remove the shared edits dirs. 1025 * 1026 * @param conf Supplies the original configuration. 1027 * @return Cloned configuration without the shared edit dirs. 1028 * @throws IOException on failure to generate the configuration. 1029 */ 1030 private static Configuration getConfigurationWithoutSharedEdits( 1031 Configuration conf) 1032 throws IOException { 1033 List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false); 1034 String editsDirsString = Joiner.on(",").join(editsDirs); 1035 1036 Configuration confWithoutShared = new Configuration(conf); 1037 confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 1038 confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, 1039 editsDirsString); 1040 return confWithoutShared; 1041 } 1042 1043 /** 1044 * Format a new shared edits dir and copy in enough edit log segments so that 1045 * the standby NN can start up. 1046 * 1047 * @param conf configuration 1048 * @param force format regardless of whether or not the shared edits dir exists 1049 * @param interactive prompt the user when a dir exists 1050 * @return true if the command aborts, false otherwise 1051 */ 1052 private static boolean initializeSharedEdits(Configuration conf, 1053 boolean force, boolean interactive) throws IOException { 1054 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1055 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1056 initializeGenericKeys(conf, nsId, namenodeId); 1057 1058 if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) { 1059 LOG.error("No shared edits directory configured for namespace " + 1060 nsId + " namenode " + namenodeId); 1061 return false; 1062 } 1063 1064 if (UserGroupInformation.isSecurityEnabled()) { 1065 InetSocketAddress socAddr = getAddress(conf); 1066 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 1067 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 1068 } 1069 1070 NNStorage existingStorage = null; 1071 FSImage sharedEditsImage = null; 1072 try { 1073 FSNamesystem fsns = 1074 FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf)); 1075 1076 existingStorage = fsns.getFSImage().getStorage(); 1077 NamespaceInfo nsInfo = existingStorage.getNamespaceInfo(); 1078 1079 List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf); 1080 1081 sharedEditsImage = new FSImage(conf, 1082 Lists.<URI>newArrayList(), 1083 sharedEditsDirs); 1084 sharedEditsImage.getEditLog().initJournalsForWrite(); 1085 1086 if (!sharedEditsImage.confirmFormat(force, interactive)) { 1087 return true; // abort 1088 } 1089 1090 NNStorage newSharedStorage = sharedEditsImage.getStorage(); 1091 // Call Storage.format instead of FSImage.format here, since we don't 1092 // actually want to save a checkpoint - just prime the dirs with 1093 // the existing namespace info 1094 newSharedStorage.format(nsInfo); 1095 sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo); 1096 1097 // Need to make sure the edit log segments are in good shape to initialize 1098 // the shared edits dir. 1099 fsns.getFSImage().getEditLog().close(); 1100 fsns.getFSImage().getEditLog().initJournalsForWrite(); 1101 fsns.getFSImage().getEditLog().recoverUnclosedStreams(); 1102 1103 copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage, 1104 conf); 1105 } catch (IOException ioe) { 1106 LOG.error("Could not initialize shared edits dir", ioe); 1107 return true; // aborted 1108 } finally { 1109 if (sharedEditsImage != null) { 1110 try { 1111 sharedEditsImage.close(); 1112 } catch (IOException ioe) { 1113 LOG.warn("Could not close sharedEditsImage", ioe); 1114 } 1115 } 1116 // Have to unlock storage explicitly for the case when we're running in a 1117 // unit test, which runs in the same JVM as NNs. 1118 if (existingStorage != null) { 1119 try { 1120 existingStorage.unlockAll(); 1121 } catch (IOException ioe) { 1122 LOG.warn("Could not unlock storage directories", ioe); 1123 return true; // aborted 1124 } 1125 } 1126 } 1127 return false; // did not abort 1128 } 1129 1130 private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns, 1131 Collection<URI> sharedEditsDirs, NNStorage newSharedStorage, 1132 Configuration conf) throws IOException { 1133 Preconditions.checkArgument(!sharedEditsDirs.isEmpty(), 1134 "No shared edits specified"); 1135 // Copy edit log segments into the new shared edits dir. 1136 List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs); 1137 FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage, 1138 sharedEditsUris); 1139 newSharedEditLog.initJournalsForWrite(); 1140 newSharedEditLog.recoverUnclosedStreams(); 1141 1142 FSEditLog sourceEditLog = fsns.getFSImage().editLog; 1143 1144 long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId(); 1145 1146 Collection<EditLogInputStream> streams = null; 1147 try { 1148 streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0); 1149 1150 // Set the nextTxid to the CheckpointTxId+1 1151 newSharedEditLog.setNextTxId(fromTxId + 1); 1152 1153 // Copy all edits after last CheckpointTxId to shared edits dir 1154 for (EditLogInputStream stream : streams) { 1155 LOG.debug("Beginning to copy stream " + stream + " to shared edits"); 1156 FSEditLogOp op; 1157 boolean segmentOpen = false; 1158 while ((op = stream.readOp()) != null) { 1159 if (LOG.isTraceEnabled()) { 1160 LOG.trace("copying op: " + op); 1161 } 1162 if (!segmentOpen) { 1163 newSharedEditLog.startLogSegment(op.txid, false); 1164 segmentOpen = true; 1165 } 1166 1167 newSharedEditLog.logEdit(op); 1168 1169 if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) { 1170 newSharedEditLog.logSync(); 1171 newSharedEditLog.endCurrentLogSegment(false); 1172 LOG.debug("ending log segment because of END_LOG_SEGMENT op in " 1173 + stream); 1174 segmentOpen = false; 1175 } 1176 } 1177 1178 if (segmentOpen) { 1179 LOG.debug("ending log segment because of end of stream in " + stream); 1180 newSharedEditLog.logSync(); 1181 newSharedEditLog.endCurrentLogSegment(false); 1182 segmentOpen = false; 1183 } 1184 } 1185 } finally { 1186 if (streams != null) { 1187 FSEditLog.closeAllStreams(streams); 1188 } 1189 } 1190 } 1191 1192 @VisibleForTesting 1193 public static boolean doRollback(Configuration conf, 1194 boolean isConfirmationNeeded) throws IOException { 1195 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1196 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1197 initializeGenericKeys(conf, nsId, namenodeId); 1198 1199 FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf)); 1200 System.err.print( 1201 "\"rollBack\" will remove the current state of the file system,\n" 1202 + "returning you to the state prior to initiating your recent.\n" 1203 + "upgrade. This action is permanent and cannot be undone. If you\n" 1204 + "are performing a rollback in an HA environment, you should be\n" 1205 + "certain that no NameNode process is running on any host."); 1206 if (isConfirmationNeeded) { 1207 if (!confirmPrompt("Roll back file system state?")) { 1208 System.err.println("Rollback aborted."); 1209 return true; 1210 } 1211 } 1212 nsys.getFSImage().doRollback(nsys); 1213 return false; 1214 } 1215 1216 private static void printUsage(PrintStream out) { 1217 out.println(USAGE + "\n"); 1218 } 1219 1220 @VisibleForTesting 1221 static StartupOption parseArguments(String args[]) { 1222 int argsLen = (args == null) ? 0 : args.length; 1223 StartupOption startOpt = StartupOption.REGULAR; 1224 for(int i=0; i < argsLen; i++) { 1225 String cmd = args[i]; 1226 if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) { 1227 startOpt = StartupOption.FORMAT; 1228 for (i = i + 1; i < argsLen; i++) { 1229 if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1230 i++; 1231 if (i >= argsLen) { 1232 // if no cluster id specified, return null 1233 LOG.error("Must specify a valid cluster ID after the " 1234 + StartupOption.CLUSTERID.getName() + " flag"); 1235 return null; 1236 } 1237 String clusterId = args[i]; 1238 // Make sure an id is specified and not another flag 1239 if (clusterId.isEmpty() || 1240 clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) || 1241 clusterId.equalsIgnoreCase( 1242 StartupOption.NONINTERACTIVE.getName())) { 1243 LOG.error("Must specify a valid cluster ID after the " 1244 + StartupOption.CLUSTERID.getName() + " flag"); 1245 return null; 1246 } 1247 startOpt.setClusterId(clusterId); 1248 } 1249 1250 if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) { 1251 startOpt.setForceFormat(true); 1252 } 1253 1254 if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) { 1255 startOpt.setInteractiveFormat(false); 1256 } 1257 } 1258 } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) { 1259 startOpt = StartupOption.GENCLUSTERID; 1260 } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) { 1261 startOpt = StartupOption.REGULAR; 1262 } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) { 1263 startOpt = StartupOption.BACKUP; 1264 } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) { 1265 startOpt = StartupOption.CHECKPOINT; 1266 } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd) 1267 || StartupOption.UPGRADEONLY.getName().equalsIgnoreCase(cmd)) { 1268 startOpt = StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd) ? 1269 StartupOption.UPGRADE : StartupOption.UPGRADEONLY; 1270 /* Can be followed by CLUSTERID with a required parameter or 1271 * RENAMERESERVED with an optional parameter 1272 */ 1273 while (i + 1 < argsLen) { 1274 String flag = args[i + 1]; 1275 if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1276 if (i + 2 < argsLen) { 1277 i += 2; 1278 startOpt.setClusterId(args[i]); 1279 } else { 1280 LOG.error("Must specify a valid cluster ID after the " 1281 + StartupOption.CLUSTERID.getName() + " flag"); 1282 return null; 1283 } 1284 } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED 1285 .getName())) { 1286 if (i + 2 < argsLen) { 1287 FSImageFormat.setRenameReservedPairs(args[i + 2]); 1288 i += 2; 1289 } else { 1290 FSImageFormat.useDefaultRenameReservedPairs(); 1291 i += 1; 1292 } 1293 } else { 1294 LOG.error("Unknown upgrade flag " + flag); 1295 return null; 1296 } 1297 } 1298 } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) { 1299 startOpt = StartupOption.ROLLINGUPGRADE; 1300 ++i; 1301 if (i >= argsLen) { 1302 LOG.error("Must specify a rolling upgrade startup option " 1303 + RollingUpgradeStartupOption.getAllOptionString()); 1304 return null; 1305 } 1306 startOpt.setRollingUpgradeStartupOption(args[i]); 1307 } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) { 1308 startOpt = StartupOption.ROLLBACK; 1309 } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) { 1310 startOpt = StartupOption.FINALIZE; 1311 } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) { 1312 startOpt = StartupOption.IMPORT; 1313 } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) { 1314 startOpt = StartupOption.BOOTSTRAPSTANDBY; 1315 return startOpt; 1316 } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) { 1317 startOpt = StartupOption.INITIALIZESHAREDEDITS; 1318 for (i = i + 1 ; i < argsLen; i++) { 1319 if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) { 1320 startOpt.setInteractiveFormat(false); 1321 } else if (StartupOption.FORCE.getName().equals(args[i])) { 1322 startOpt.setForceFormat(true); 1323 } else { 1324 LOG.error("Invalid argument: " + args[i]); 1325 return null; 1326 } 1327 } 1328 return startOpt; 1329 } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) { 1330 if (startOpt != StartupOption.REGULAR) { 1331 throw new RuntimeException("Can't combine -recover with " + 1332 "other startup options."); 1333 } 1334 startOpt = StartupOption.RECOVER; 1335 while (++i < argsLen) { 1336 if (args[i].equalsIgnoreCase( 1337 StartupOption.FORCE.getName())) { 1338 startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE); 1339 } else { 1340 throw new RuntimeException("Error parsing recovery options: " + 1341 "can't understand option \"" + args[i] + "\""); 1342 } 1343 } 1344 } else if (StartupOption.METADATAVERSION.getName().equalsIgnoreCase(cmd)) { 1345 startOpt = StartupOption.METADATAVERSION; 1346 } else { 1347 return null; 1348 } 1349 } 1350 return startOpt; 1351 } 1352 1353 private static void setStartupOption(Configuration conf, StartupOption opt) { 1354 conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name()); 1355 } 1356 1357 static StartupOption getStartupOption(Configuration conf) { 1358 return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY, 1359 StartupOption.REGULAR.toString())); 1360 } 1361 1362 private static void doRecovery(StartupOption startOpt, Configuration conf) 1363 throws IOException { 1364 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1365 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1366 initializeGenericKeys(conf, nsId, namenodeId); 1367 if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) { 1368 if (!confirmPrompt("You have selected Metadata Recovery mode. " + 1369 "This mode is intended to recover lost metadata on a corrupt " + 1370 "filesystem. Metadata recovery mode often permanently deletes " + 1371 "data from your HDFS filesystem. Please back up your edit log " + 1372 "and fsimage before trying this!\n\n" + 1373 "Are you ready to proceed? (Y/N)\n")) { 1374 System.err.println("Recovery aborted at user request.\n"); 1375 return; 1376 } 1377 } 1378 MetaRecoveryContext.LOG.info("starting recovery..."); 1379 UserGroupInformation.setConfiguration(conf); 1380 NameNode.initMetrics(conf, startOpt.toNodeRole()); 1381 FSNamesystem fsn = null; 1382 try { 1383 fsn = FSNamesystem.loadFromDisk(conf); 1384 fsn.getFSImage().saveNamespace(fsn); 1385 MetaRecoveryContext.LOG.info("RECOVERY COMPLETE"); 1386 } catch (IOException e) { 1387 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1388 throw e; 1389 } catch (RuntimeException e) { 1390 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1391 throw e; 1392 } finally { 1393 if (fsn != null) 1394 fsn.close(); 1395 } 1396 } 1397 1398 /** 1399 * Verify that configured directories exist, then print the metadata versions 1400 * of the software and the image. 1401 * 1402 * @param conf configuration to use 1403 * @throws IOException 1404 */ 1405 private static boolean printMetadataVersion(Configuration conf) 1406 throws IOException { 1407 final String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1408 final String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1409 NameNode.initializeGenericKeys(conf, nsId, namenodeId); 1410 final FSImage fsImage = new FSImage(conf); 1411 final FSNamesystem fs = new FSNamesystem(conf, fsImage, false); 1412 return fsImage.recoverTransitionRead( 1413 StartupOption.METADATAVERSION, fs, null); 1414 } 1415 1416 public static NameNode createNameNode(String argv[], Configuration conf) 1417 throws IOException { 1418 LOG.info("createNameNode " + Arrays.asList(argv)); 1419 if (conf == null) 1420 conf = new HdfsConfiguration(); 1421 // Parse out some generic args into Configuration. 1422 GenericOptionsParser hParser = new GenericOptionsParser(conf, argv); 1423 argv = hParser.getRemainingArgs(); 1424 // Parse the rest, NN specific args. 1425 StartupOption startOpt = parseArguments(argv); 1426 if (startOpt == null) { 1427 printUsage(System.err); 1428 return null; 1429 } 1430 setStartupOption(conf, startOpt); 1431 1432 switch (startOpt) { 1433 case FORMAT: { 1434 boolean aborted = format(conf, startOpt.getForceFormat(), 1435 startOpt.getInteractiveFormat()); 1436 terminate(aborted ? 1 : 0); 1437 return null; // avoid javac warning 1438 } 1439 case GENCLUSTERID: { 1440 System.err.println("Generating new cluster id:"); 1441 System.out.println(NNStorage.newClusterID()); 1442 terminate(0); 1443 return null; 1444 } 1445 case FINALIZE: { 1446 System.err.println("Use of the argument '" + StartupOption.FINALIZE + 1447 "' is no longer supported. To finalize an upgrade, start the NN " + 1448 " and then run `hdfs dfsadmin -finalizeUpgrade'"); 1449 terminate(1); 1450 return null; // avoid javac warning 1451 } 1452 case ROLLBACK: { 1453 boolean aborted = doRollback(conf, true); 1454 terminate(aborted ? 1 : 0); 1455 return null; // avoid warning 1456 } 1457 case BOOTSTRAPSTANDBY: { 1458 String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length); 1459 int rc = BootstrapStandby.run(toolArgs, conf); 1460 terminate(rc); 1461 return null; // avoid warning 1462 } 1463 case INITIALIZESHAREDEDITS: { 1464 boolean aborted = initializeSharedEdits(conf, 1465 startOpt.getForceFormat(), 1466 startOpt.getInteractiveFormat()); 1467 terminate(aborted ? 1 : 0); 1468 return null; // avoid warning 1469 } 1470 case BACKUP: 1471 case CHECKPOINT: { 1472 NamenodeRole role = startOpt.toNodeRole(); 1473 DefaultMetricsSystem.initialize(role.toString().replace(" ", "")); 1474 return new BackupNode(conf, role); 1475 } 1476 case RECOVER: { 1477 NameNode.doRecovery(startOpt, conf); 1478 return null; 1479 } 1480 case METADATAVERSION: { 1481 printMetadataVersion(conf); 1482 terminate(0); 1483 return null; // avoid javac warning 1484 } 1485 case UPGRADEONLY: { 1486 DefaultMetricsSystem.initialize("NameNode"); 1487 new NameNode(conf); 1488 terminate(0); 1489 return null; 1490 } 1491 default: { 1492 DefaultMetricsSystem.initialize("NameNode"); 1493 return new NameNode(conf); 1494 } 1495 } 1496 } 1497 1498 /** 1499 * In federation configuration is set for a set of 1500 * namenode and secondary namenode/backup/checkpointer, which are 1501 * grouped under a logical nameservice ID. The configuration keys specific 1502 * to them have suffix set to configured nameserviceId. 1503 * 1504 * This method copies the value from specific key of format key.nameserviceId 1505 * to key, to set up the generic configuration. Once this is done, only 1506 * generic version of the configuration is read in rest of the code, for 1507 * backward compatibility and simpler code changes. 1508 * 1509 * @param conf 1510 * Configuration object to lookup specific key and to set the value 1511 * to the key passed. Note the conf object is modified 1512 * @param nameserviceId name service Id (to distinguish federated NNs) 1513 * @param namenodeId the namenode ID (to distinguish HA NNs) 1514 * @see DFSUtil#setGenericConf(Configuration, String, String, String...) 1515 */ 1516 public static void initializeGenericKeys(Configuration conf, 1517 String nameserviceId, String namenodeId) { 1518 if ((nameserviceId != null && !nameserviceId.isEmpty()) || 1519 (namenodeId != null && !namenodeId.isEmpty())) { 1520 if (nameserviceId != null) { 1521 conf.set(DFS_NAMESERVICE_ID, nameserviceId); 1522 } 1523 if (namenodeId != null) { 1524 conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId); 1525 } 1526 1527 DFSUtil.setGenericConf(conf, nameserviceId, namenodeId, 1528 NAMENODE_SPECIFIC_KEYS); 1529 DFSUtil.setGenericConf(conf, nameserviceId, null, 1530 NAMESERVICE_SPECIFIC_KEYS); 1531 } 1532 1533 // If the RPC address is set use it to (re-)configure the default FS 1534 if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) { 1535 URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 1536 + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY)); 1537 conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); 1538 LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString()); 1539 } 1540 } 1541 1542 /** 1543 * Get the name service Id for the node 1544 * @return name service Id or null if federation is not configured 1545 */ 1546 protected String getNameServiceId(Configuration conf) { 1547 return DFSUtil.getNamenodeNameServiceId(conf); 1548 } 1549 1550 /** 1551 */ 1552 public static void main(String argv[]) throws Exception { 1553 if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) { 1554 System.exit(0); 1555 } 1556 1557 try { 1558 StringUtils.startupShutdownMessage(NameNode.class, argv, LOG); 1559 NameNode namenode = createNameNode(argv, null); 1560 if (namenode != null) { 1561 namenode.join(); 1562 } 1563 } catch (Throwable e) { 1564 LOG.error("Failed to start namenode.", e); 1565 terminate(1, e); 1566 } 1567 } 1568 1569 synchronized void monitorHealth() 1570 throws HealthCheckFailedException, AccessControlException { 1571 namesystem.checkSuperuserPrivilege(); 1572 if (!haEnabled) { 1573 return; // no-op, if HA is not enabled 1574 } 1575 getNamesystem().checkAvailableResources(); 1576 if (!getNamesystem().nameNodeHasResourcesAvailable()) { 1577 throw new HealthCheckFailedException( 1578 "The NameNode has no resources available"); 1579 } 1580 } 1581 1582 synchronized void transitionToActive() 1583 throws ServiceFailedException, AccessControlException { 1584 namesystem.checkSuperuserPrivilege(); 1585 if (!haEnabled) { 1586 throw new ServiceFailedException("HA for namenode is not enabled"); 1587 } 1588 state.setState(haContext, ACTIVE_STATE); 1589 } 1590 1591 synchronized void transitionToStandby() 1592 throws ServiceFailedException, AccessControlException { 1593 namesystem.checkSuperuserPrivilege(); 1594 if (!haEnabled) { 1595 throw new ServiceFailedException("HA for namenode is not enabled"); 1596 } 1597 state.setState(haContext, STANDBY_STATE); 1598 } 1599 1600 synchronized HAServiceStatus getServiceStatus() 1601 throws ServiceFailedException, AccessControlException { 1602 namesystem.checkSuperuserPrivilege(); 1603 if (!haEnabled) { 1604 throw new ServiceFailedException("HA for namenode is not enabled"); 1605 } 1606 if (state == null) { 1607 return new HAServiceStatus(HAServiceState.INITIALIZING); 1608 } 1609 HAServiceState retState = state.getServiceState(); 1610 HAServiceStatus ret = new HAServiceStatus(retState); 1611 if (retState == HAServiceState.STANDBY) { 1612 String safemodeTip = namesystem.getSafeModeTip(); 1613 if (!safemodeTip.isEmpty()) { 1614 ret.setNotReadyToBecomeActive( 1615 "The NameNode is in safemode. " + 1616 safemodeTip); 1617 } else { 1618 ret.setReadyToBecomeActive(); 1619 } 1620 } else if (retState == HAServiceState.ACTIVE) { 1621 ret.setReadyToBecomeActive(); 1622 } else { 1623 ret.setNotReadyToBecomeActive("State is " + state); 1624 } 1625 return ret; 1626 } 1627 1628 synchronized HAServiceState getServiceState() { 1629 if (state == null) { 1630 return HAServiceState.INITIALIZING; 1631 } 1632 return state.getServiceState(); 1633 } 1634 1635 /** 1636 * Register NameNodeStatusMXBean 1637 */ 1638 private void registerNNSMXBean() { 1639 nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this); 1640 } 1641 1642 @Override // NameNodeStatusMXBean 1643 public String getNNRole() { 1644 String roleStr = ""; 1645 NamenodeRole role = getRole(); 1646 if (null != role) { 1647 roleStr = role.toString(); 1648 } 1649 return roleStr; 1650 } 1651 1652 @Override // NameNodeStatusMXBean 1653 public String getState() { 1654 String servStateStr = ""; 1655 HAServiceState servState = getServiceState(); 1656 if (null != servState) { 1657 servStateStr = servState.toString(); 1658 } 1659 return servStateStr; 1660 } 1661 1662 @Override // NameNodeStatusMXBean 1663 public String getHostAndPort() { 1664 return getNameNodeAddressHostPortString(); 1665 } 1666 1667 @Override // NameNodeStatusMXBean 1668 public boolean isSecurityEnabled() { 1669 return UserGroupInformation.isSecurityEnabled(); 1670 } 1671 1672 @Override // NameNodeStatusMXBean 1673 public long getLastHATransitionTime() { 1674 return state.getLastHATransitionTime(); 1675 } 1676 1677 /** 1678 * Shutdown the NN immediately in an ungraceful way. Used when it would be 1679 * unsafe for the NN to continue operating, e.g. during a failed HA state 1680 * transition. 1681 * 1682 * @param t exception which warrants the shutdown. Printed to the NN log 1683 * before exit. 1684 * @throws ExitException thrown only for testing. 1685 */ 1686 protected synchronized void doImmediateShutdown(Throwable t) 1687 throws ExitException { 1688 String message = "Error encountered requiring NN shutdown. " + 1689 "Shutting down immediately."; 1690 try { 1691 LOG.error(message, t); 1692 } catch (Throwable ignored) { 1693 // This is unlikely to happen, but there's nothing we can do if it does. 1694 } 1695 terminate(1, t); 1696 } 1697 1698 /** 1699 * Class used to expose {@link NameNode} as context to {@link HAState} 1700 */ 1701 protected class NameNodeHAContext implements HAContext { 1702 @Override 1703 public void setState(HAState s) { 1704 state = s; 1705 } 1706 1707 @Override 1708 public HAState getState() { 1709 return state; 1710 } 1711 1712 @Override 1713 public void startActiveServices() throws IOException { 1714 try { 1715 namesystem.startActiveServices(); 1716 startTrashEmptier(conf); 1717 } catch (Throwable t) { 1718 doImmediateShutdown(t); 1719 } 1720 } 1721 1722 @Override 1723 public void stopActiveServices() throws IOException { 1724 try { 1725 if (namesystem != null) { 1726 namesystem.stopActiveServices(); 1727 } 1728 stopTrashEmptier(); 1729 } catch (Throwable t) { 1730 doImmediateShutdown(t); 1731 } 1732 } 1733 1734 @Override 1735 public void startStandbyServices() throws IOException { 1736 try { 1737 namesystem.startStandbyServices(conf); 1738 } catch (Throwable t) { 1739 doImmediateShutdown(t); 1740 } 1741 } 1742 1743 @Override 1744 public void prepareToStopStandbyServices() throws ServiceFailedException { 1745 try { 1746 namesystem.prepareToStopStandbyServices(); 1747 } catch (Throwable t) { 1748 doImmediateShutdown(t); 1749 } 1750 } 1751 1752 @Override 1753 public void stopStandbyServices() throws IOException { 1754 try { 1755 if (namesystem != null) { 1756 namesystem.stopStandbyServices(); 1757 } 1758 } catch (Throwable t) { 1759 doImmediateShutdown(t); 1760 } 1761 } 1762 1763 @Override 1764 public void writeLock() { 1765 namesystem.writeLock(); 1766 namesystem.lockRetryCache(); 1767 } 1768 1769 @Override 1770 public void writeUnlock() { 1771 namesystem.unlockRetryCache(); 1772 namesystem.writeUnlock(); 1773 } 1774 1775 /** Check if an operation of given category is allowed */ 1776 @Override 1777 public void checkOperation(final OperationCategory op) 1778 throws StandbyException { 1779 state.checkOperation(haContext, op); 1780 } 1781 1782 @Override 1783 public boolean allowStaleReads() { 1784 return allowStaleStandbyReads; 1785 } 1786 1787 } 1788 1789 public boolean isStandbyState() { 1790 return (state.equals(STANDBY_STATE)); 1791 } 1792 1793 public boolean isActiveState() { 1794 return (state.equals(ACTIVE_STATE)); 1795 } 1796 1797 /** 1798 * Returns whether the NameNode is completely started 1799 */ 1800 boolean isStarted() { 1801 return this.started.get(); 1802 } 1803 1804 /** 1805 * Check that a request to change this node's HA state is valid. 1806 * In particular, verifies that, if auto failover is enabled, non-forced 1807 * requests from the HAAdmin CLI are rejected, and vice versa. 1808 * 1809 * @param req the request to check 1810 * @throws AccessControlException if the request is disallowed 1811 */ 1812 void checkHaStateChange(StateChangeRequestInfo req) 1813 throws AccessControlException { 1814 boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY, 1815 DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT); 1816 switch (req.getSource()) { 1817 case REQUEST_BY_USER: 1818 if (autoHaEnabled) { 1819 throw new AccessControlException( 1820 "Manual HA control for this NameNode is disallowed, because " + 1821 "automatic HA is enabled."); 1822 } 1823 break; 1824 case REQUEST_BY_USER_FORCED: 1825 if (autoHaEnabled) { 1826 LOG.warn("Allowing manual HA control from " + 1827 Server.getRemoteAddress() + 1828 " even though automatic HA is enabled, because the user " + 1829 "specified the force flag"); 1830 } 1831 break; 1832 case REQUEST_BY_ZKFC: 1833 if (!autoHaEnabled) { 1834 throw new AccessControlException( 1835 "Request from ZK failover controller at " + 1836 Server.getRemoteAddress() + " denied since automatic HA " + 1837 "is not enabled"); 1838 } 1839 break; 1840 } 1841 } 1842}