001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.hdfs.server.namenode; 019 020 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 021 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 022 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; 023 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; 024 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT; 025 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY; 026 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT; 027 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY; 028 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT; 029 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY; 030 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT; 031 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY; 032 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT; 033 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY; 034 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT; 035 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY; 036 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT; 037 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY; 038 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY; 039 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT; 040 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY; 041 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT; 042 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY; 043 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT; 044 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY; 045 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME; 046 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT; 047 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY; 048 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT; 049 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY; 050 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT; 051 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY; 052 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT; 053 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY; 054 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY; 055 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY; 056 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS; 057 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT; 058 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD; 059 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT; 060 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT; 061 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY; 062 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT; 063 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY; 064 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; 065 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT; 066 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY; 067 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY; 068 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT; 069 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY; 070 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT; 071 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY; 072 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT; 073 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY; 074 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY; 075 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT; 076 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY; 077 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT; 078 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY; 079 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; 080 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT; 081 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY; 082 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT; 083 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY; 084 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT; 085 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; 086 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT; 087 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY; 088 import static org.apache.hadoop.util.Time.now; 089 090 import java.io.BufferedWriter; 091 import java.io.ByteArrayInputStream; 092 import java.io.DataInput; 093 import java.io.DataInputStream; 094 import java.io.DataOutputStream; 095 import java.io.File; 096 import java.io.FileNotFoundException; 097 import java.io.FileOutputStream; 098 import java.io.IOException; 099 import java.io.OutputStreamWriter; 100 import java.io.PrintWriter; 101 import java.io.StringWriter; 102 import java.lang.management.ManagementFactory; 103 import java.net.InetAddress; 104 import java.net.URI; 105 import java.util.ArrayList; 106 import java.util.Arrays; 107 import java.util.Collection; 108 import java.util.Collections; 109 import java.util.Date; 110 import java.util.EnumSet; 111 import java.util.HashMap; 112 import java.util.HashSet; 113 import java.util.Iterator; 114 import java.util.LinkedHashSet; 115 import java.util.List; 116 import java.util.Map; 117 import java.util.Set; 118 import java.util.concurrent.TimeUnit; 119 import java.util.concurrent.locks.ReentrantReadWriteLock; 120 121 import javax.management.NotCompliantMBeanException; 122 import javax.management.ObjectName; 123 import javax.management.StandardMBean; 124 125 import org.apache.commons.logging.Log; 126 import org.apache.commons.logging.LogFactory; 127 import org.apache.commons.logging.impl.Log4JLogger; 128 import org.apache.hadoop.HadoopIllegalArgumentException; 129 import org.apache.hadoop.classification.InterfaceAudience; 130 import org.apache.hadoop.conf.Configuration; 131 import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; 132 import org.apache.hadoop.fs.CacheFlag; 133 import org.apache.hadoop.fs.ContentSummary; 134 import org.apache.hadoop.fs.CreateFlag; 135 import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException; 136 import org.apache.hadoop.fs.FileAlreadyExistsException; 137 import org.apache.hadoop.fs.FileStatus; 138 import org.apache.hadoop.fs.FileSystem; 139 import org.apache.hadoop.fs.FsServerDefaults; 140 import org.apache.hadoop.fs.InvalidPathException; 141 import org.apache.hadoop.fs.Options; 142 import org.apache.hadoop.fs.Options.Rename; 143 import org.apache.hadoop.fs.ParentNotDirectoryException; 144 import org.apache.hadoop.fs.Path; 145 import org.apache.hadoop.fs.UnresolvedLinkException; 146 import org.apache.hadoop.fs.permission.FsAction; 147 import org.apache.hadoop.fs.permission.FsPermission; 148 import org.apache.hadoop.fs.permission.PermissionStatus; 149 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 150 import org.apache.hadoop.ha.ServiceFailedException; 151 import org.apache.hadoop.hdfs.DFSConfigKeys; 152 import org.apache.hadoop.hdfs.DFSUtil; 153 import org.apache.hadoop.hdfs.HAUtil; 154 import org.apache.hadoop.hdfs.HdfsConfiguration; 155 import org.apache.hadoop.hdfs.StorageType; 156 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; 157 import org.apache.hadoop.hdfs.protocol.Block; 158 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; 159 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; 160 import org.apache.hadoop.hdfs.protocol.ClientProtocol; 161 import org.apache.hadoop.hdfs.protocol.DatanodeID; 162 import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 163 import org.apache.hadoop.hdfs.protocol.DirectoryListing; 164 import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 165 import org.apache.hadoop.hdfs.protocol.HdfsConstants; 166 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; 167 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; 168 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 169 import org.apache.hadoop.hdfs.protocol.LocatedBlock; 170 import org.apache.hadoop.hdfs.protocol.LocatedBlocks; 171 import org.apache.hadoop.hdfs.protocol.CachePoolEntry; 172 import org.apache.hadoop.hdfs.protocol.CachePoolInfo; 173 import org.apache.hadoop.hdfs.protocol.QuotaExceededException; 174 import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException; 175 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; 176 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry; 177 import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; 178 import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure; 179 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; 180 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode; 181 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; 182 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; 183 import org.apache.hadoop.hdfs.server.blockmanagement.*; 184 import org.apache.hadoop.hdfs.server.common.GenerationStamp; 185 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; 186 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 187 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 188 import org.apache.hadoop.hdfs.server.common.Storage; 189 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType; 190 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; 191 import org.apache.hadoop.hdfs.server.common.Util; 192 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; 193 import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; 194 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 195 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; 196 import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer; 197 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; 198 import org.apache.hadoop.hdfs.server.namenode.ha.HAState; 199 import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer; 200 import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean; 201 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 202 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable; 203 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable.SnapshotDiffInfo; 204 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithSnapshot; 205 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; 206 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager; 207 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 208 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 209 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 210 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status; 211 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 212 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; 213 import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods; 214 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; 215 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; 216 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; 217 import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat; 218 import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand; 219 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; 220 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 221 import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; 222 import org.apache.hadoop.hdfs.server.protocol.StorageReport; 223 import org.apache.hadoop.hdfs.util.ChunkedArrayList; 224 import org.apache.hadoop.io.IOUtils; 225 import org.apache.hadoop.io.Text; 226 import org.apache.hadoop.ipc.RetriableException; 227 import org.apache.hadoop.ipc.RetryCache; 228 import org.apache.hadoop.ipc.RetryCache.CacheEntry; 229 import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload; 230 import org.apache.hadoop.ipc.Server; 231 import org.apache.hadoop.ipc.StandbyException; 232 import org.apache.hadoop.metrics2.annotation.Metric; 233 import org.apache.hadoop.metrics2.annotation.Metrics; 234 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 235 import org.apache.hadoop.metrics2.util.MBeans; 236 import org.apache.hadoop.net.NetworkTopology; 237 import org.apache.hadoop.net.Node; 238 import org.apache.hadoop.security.AccessControlException; 239 import org.apache.hadoop.security.UserGroupInformation; 240 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; 241 import org.apache.hadoop.security.token.SecretManager.InvalidToken; 242 import org.apache.hadoop.security.token.Token; 243 import org.apache.hadoop.security.token.TokenIdentifier; 244 import org.apache.hadoop.security.token.delegation.DelegationKey; 245 import org.apache.hadoop.util.Daemon; 246 import org.apache.hadoop.util.DataChecksum; 247 import org.apache.hadoop.util.StringUtils; 248 import org.apache.hadoop.util.Time; 249 import org.apache.hadoop.util.VersionInfo; 250 import org.apache.log4j.Appender; 251 import org.apache.log4j.AsyncAppender; 252 import org.apache.log4j.Logger; 253 import org.mortbay.util.ajax.JSON; 254 255 import com.google.common.annotations.VisibleForTesting; 256 import com.google.common.base.Charsets; 257 import com.google.common.base.Preconditions; 258 import com.google.common.collect.ImmutableMap; 259 import com.google.common.collect.Lists; 260 261 /*************************************************** 262 * FSNamesystem does the actual bookkeeping work for the 263 * DataNode. 264 * 265 * It tracks several important tables. 266 * 267 * 1) valid fsname --> blocklist (kept on disk, logged) 268 * 2) Set of all valid blocks (inverted #1) 269 * 3) block --> machinelist (kept in memory, rebuilt dynamically from reports) 270 * 4) machine --> blocklist (inverted #2) 271 * 5) LRU cache of updated-heartbeat machines 272 ***************************************************/ 273 @InterfaceAudience.Private 274 @Metrics(context="dfs") 275 public class FSNamesystem implements Namesystem, FSClusterStats, 276 FSNamesystemMBean, NameNodeMXBean { 277 public static final Log LOG = LogFactory.getLog(FSNamesystem.class); 278 279 private static final ThreadLocal<StringBuilder> auditBuffer = 280 new ThreadLocal<StringBuilder>() { 281 @Override 282 protected StringBuilder initialValue() { 283 return new StringBuilder(); 284 } 285 }; 286 287 @VisibleForTesting 288 public boolean isAuditEnabled() { 289 return !isDefaultAuditLogger || auditLog.isInfoEnabled(); 290 } 291 292 private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink) 293 throws IOException { 294 return (isAuditEnabled() && isExternalInvocation()) 295 ? dir.getFileInfo(path, resolveSymlink) : null; 296 } 297 298 private void logAuditEvent(boolean succeeded, String cmd, String src) 299 throws IOException { 300 logAuditEvent(succeeded, cmd, src, null, null); 301 } 302 303 private void logAuditEvent(boolean succeeded, String cmd, String src, 304 String dst, HdfsFileStatus stat) throws IOException { 305 if (isAuditEnabled() && isExternalInvocation()) { 306 logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(), 307 cmd, src, dst, stat); 308 } 309 } 310 311 private void logAuditEvent(boolean succeeded, 312 UserGroupInformation ugi, InetAddress addr, String cmd, String src, 313 String dst, HdfsFileStatus stat) { 314 FileStatus status = null; 315 if (stat != null) { 316 Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null; 317 Path path = dst != null ? new Path(dst) : new Path(src); 318 status = new FileStatus(stat.getLen(), stat.isDir(), 319 stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(), 320 stat.getAccessTime(), stat.getPermission(), stat.getOwner(), 321 stat.getGroup(), symlink, path); 322 } 323 for (AuditLogger logger : auditLoggers) { 324 if (logger instanceof HdfsAuditLogger) { 325 HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger; 326 hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst, 327 status, ugi, dtSecretManager); 328 } else { 329 logger.logAuditEvent(succeeded, ugi.toString(), addr, 330 cmd, src, dst, status); 331 } 332 } 333 } 334 335 /** 336 * Logger for audit events, noting successful FSNamesystem operations. Emits 337 * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated 338 * <code>key=value</code> pairs to be written for the following properties: 339 * <code> 340 * ugi=<ugi in RPC> 341 * ip=<remote IP> 342 * cmd=<command> 343 * src=<src path> 344 * dst=<dst path (optional)> 345 * perm=<permissions (optional)> 346 * </code> 347 */ 348 public static final Log auditLog = LogFactory.getLog( 349 FSNamesystem.class.getName() + ".audit"); 350 351 static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100; 352 static int BLOCK_DELETION_INCREMENT = 1000; 353 private final boolean isPermissionEnabled; 354 private final UserGroupInformation fsOwner; 355 private final String fsOwnerShortUserName; 356 private final String supergroup; 357 private final boolean standbyShouldCheckpoint; 358 359 // Scan interval is not configurable. 360 private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL = 361 TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS); 362 final DelegationTokenSecretManager dtSecretManager; 363 private final boolean alwaysUseDelegationTokensForTests; 364 365 private static final Step STEP_AWAITING_REPORTED_BLOCKS = 366 new Step(StepType.AWAITING_REPORTED_BLOCKS); 367 368 // Tracks whether the default audit logger is the only configured audit 369 // logger; this allows isAuditEnabled() to return false in case the 370 // underlying logger is disabled, and avoid some unnecessary work. 371 private final boolean isDefaultAuditLogger; 372 private final List<AuditLogger> auditLoggers; 373 374 /** The namespace tree. */ 375 FSDirectory dir; 376 private final BlockManager blockManager; 377 private final SnapshotManager snapshotManager; 378 private final CacheManager cacheManager; 379 private final DatanodeStatistics datanodeStatistics; 380 381 // Block pool ID used by this namenode 382 private String blockPoolId; 383 384 final LeaseManager leaseManager = new LeaseManager(this); 385 386 volatile Daemon smmthread = null; // SafeModeMonitor thread 387 388 Daemon nnrmthread = null; // NamenodeResourceMonitor thread 389 390 Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread 391 /** 392 * When an active namenode will roll its own edit log, in # edits 393 */ 394 private final long editLogRollerThreshold; 395 /** 396 * Check interval of an active namenode's edit log roller thread 397 */ 398 private final int editLogRollerInterval; 399 400 private volatile boolean hasResourcesAvailable = false; 401 private volatile boolean fsRunning = true; 402 403 /** The start time of the namesystem. */ 404 private final long startTime = now(); 405 406 /** The interval of namenode checking for the disk space availability */ 407 private final long resourceRecheckInterval; 408 409 // The actual resource checker instance. 410 NameNodeResourceChecker nnResourceChecker; 411 412 private final FsServerDefaults serverDefaults; 413 private final boolean supportAppends; 414 private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure; 415 416 private volatile SafeModeInfo safeMode; // safe mode information 417 418 private final long maxFsObjects; // maximum number of fs objects 419 420 private final long minBlockSize; // minimum block size 421 private final long maxBlocksPerFile; // maximum # of blocks per file 422 423 /** 424 * The global generation stamp for legacy blocks with randomly 425 * generated block IDs. 426 */ 427 private final GenerationStamp generationStampV1 = new GenerationStamp(); 428 429 /** 430 * The global generation stamp for this file system. 431 */ 432 private final GenerationStamp generationStampV2 = new GenerationStamp(); 433 434 /** 435 * The value of the generation stamp when the first switch to sequential 436 * block IDs was made. Blocks with generation stamps below this value 437 * have randomly allocated block IDs. Blocks with generation stamps above 438 * this value had sequentially allocated block IDs. Read from the fsImage 439 * (or initialized as an offset from the V1 (legacy) generation stamp on 440 * upgrade). 441 */ 442 private long generationStampV1Limit = 443 GenerationStamp.GRANDFATHER_GENERATION_STAMP; 444 445 /** 446 * The global block ID space for this file system. 447 */ 448 @VisibleForTesting 449 private final SequentialBlockIdGenerator blockIdGenerator; 450 451 // precision of access times. 452 private final long accessTimePrecision; 453 454 /** Lock to protect FSNamesystem. */ 455 private FSNamesystemLock fsLock; 456 457 /** 458 * Used when this NN is in standby state to read from the shared edit log. 459 */ 460 private EditLogTailer editLogTailer = null; 461 462 /** 463 * Used when this NN is in standby state to perform checkpoints. 464 */ 465 private StandbyCheckpointer standbyCheckpointer; 466 467 /** 468 * Reference to the NN's HAContext object. This is only set once 469 * {@link #startCommonServices(Configuration, HAContext)} is called. 470 */ 471 private HAContext haContext; 472 473 private final boolean haEnabled; 474 475 /** 476 * Whether the namenode is in the middle of starting the active service 477 */ 478 private volatile boolean startingActiveService = false; 479 480 private INodeId inodeId; 481 482 private final RetryCache retryCache; 483 484 /** 485 * Set the last allocated inode id when fsimage or editlog is loaded. 486 */ 487 public void resetLastInodeId(long newValue) throws IOException { 488 try { 489 inodeId.skipTo(newValue); 490 } catch(IllegalStateException ise) { 491 throw new IOException(ise); 492 } 493 } 494 495 /** Should only be used for tests to reset to any value */ 496 void resetLastInodeIdWithoutChecking(long newValue) { 497 inodeId.setCurrentValue(newValue); 498 } 499 500 /** @return the last inode ID. */ 501 public long getLastInodeId() { 502 return inodeId.getCurrentValue(); 503 } 504 505 /** Allocate a new inode ID. */ 506 public long allocateNewInodeId() { 507 return inodeId.nextValue(); 508 } 509 510 /** 511 * Clear all loaded data 512 */ 513 void clear() { 514 dir.reset(); 515 dtSecretManager.reset(); 516 generationStampV1.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP); 517 generationStampV2.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP); 518 blockIdGenerator.setCurrentValue( 519 SequentialBlockIdGenerator.LAST_RESERVED_BLOCK_ID); 520 generationStampV1Limit = GenerationStamp.GRANDFATHER_GENERATION_STAMP; 521 leaseManager.removeAllLeases(); 522 inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID); 523 snapshotManager.clearSnapshottableDirs(); 524 cacheManager.clear(); 525 } 526 527 @VisibleForTesting 528 LeaseManager getLeaseManager() { 529 return leaseManager; 530 } 531 532 /** 533 * Check the supplied configuration for correctness. 534 * @param conf Supplies the configuration to validate. 535 * @throws IOException if the configuration could not be queried. 536 * @throws IllegalArgumentException if the configuration is invalid. 537 */ 538 private static void checkConfiguration(Configuration conf) 539 throws IOException { 540 541 final Collection<URI> namespaceDirs = 542 FSNamesystem.getNamespaceDirs(conf); 543 final Collection<URI> editsDirs = 544 FSNamesystem.getNamespaceEditsDirs(conf); 545 final Collection<URI> requiredEditsDirs = 546 FSNamesystem.getRequiredNamespaceEditsDirs(conf); 547 final Collection<URI> sharedEditsDirs = 548 FSNamesystem.getSharedEditsDirs(conf); 549 550 for (URI u : requiredEditsDirs) { 551 if (u.toString().compareTo( 552 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) { 553 continue; 554 } 555 556 // Each required directory must also be in editsDirs or in 557 // sharedEditsDirs. 558 if (!editsDirs.contains(u) && 559 !sharedEditsDirs.contains(u)) { 560 throw new IllegalArgumentException( 561 "Required edits directory " + u.toString() + " not present in " + 562 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " + 563 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" + 564 editsDirs.toString() + "; " + 565 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" + 566 requiredEditsDirs.toString() + ". " + 567 DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" + 568 sharedEditsDirs.toString() + "."); 569 } 570 } 571 572 if (namespaceDirs.size() == 1) { 573 LOG.warn("Only one image storage directory (" 574 + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of dataloss" 575 + " due to lack of redundant storage directories!"); 576 } 577 if (editsDirs.size() == 1) { 578 LOG.warn("Only one namespace edits storage directory (" 579 + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of dataloss" 580 + " due to lack of redundant storage directories!"); 581 } 582 } 583 584 /** 585 * Instantiates an FSNamesystem loaded from the image and edits 586 * directories specified in the passed Configuration. 587 * 588 * @param conf the Configuration which specifies the storage directories 589 * from which to load 590 * @return an FSNamesystem which contains the loaded namespace 591 * @throws IOException if loading fails 592 */ 593 public static FSNamesystem loadFromDisk(Configuration conf) 594 throws IOException { 595 596 checkConfiguration(conf); 597 FSImage fsImage = new FSImage(conf, 598 FSNamesystem.getNamespaceDirs(conf), 599 FSNamesystem.getNamespaceEditsDirs(conf)); 600 FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false); 601 StartupOption startOpt = NameNode.getStartupOption(conf); 602 if (startOpt == StartupOption.RECOVER) { 603 namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER); 604 } 605 606 long loadStart = now(); 607 String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf); 608 try { 609 namesystem.loadFSImage(startOpt, fsImage, 610 HAUtil.isHAEnabled(conf, nameserviceId)); 611 } catch (IOException ioe) { 612 LOG.warn("Encountered exception loading fsimage", ioe); 613 fsImage.close(); 614 throw ioe; 615 } 616 long timeTakenToLoadFSImage = now() - loadStart; 617 LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs"); 618 NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics(); 619 if (nnMetrics != null) { 620 nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage); 621 } 622 return namesystem; 623 } 624 625 FSNamesystem(Configuration conf, FSImage fsImage) throws IOException { 626 this(conf, fsImage, false); 627 } 628 629 /** 630 * Create an FSNamesystem associated with the specified image. 631 * 632 * Note that this does not load any data off of disk -- if you would 633 * like that behavior, use {@link #loadFromDisk(Configuration)} 634 * 635 * @param conf configuration 636 * @param fsImage The FSImage to associate with 637 * @param ignoreRetryCache Whether or not should ignore the retry cache setup 638 * step. For Secondary NN this should be set to true. 639 * @throws IOException on bad configuration 640 */ 641 FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache) 642 throws IOException { 643 if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, 644 DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) { 645 LOG.info("Enabling async auditlog"); 646 enableAsyncAuditLog(); 647 } 648 boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true); 649 LOG.info("fsLock is fair:" + fair); 650 fsLock = new FSNamesystemLock(fair); 651 try { 652 resourceRecheckInterval = conf.getLong( 653 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 654 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT); 655 656 this.blockManager = new BlockManager(this, this, conf); 657 this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics(); 658 this.blockIdGenerator = new SequentialBlockIdGenerator(this.blockManager); 659 660 this.fsOwner = UserGroupInformation.getCurrentUser(); 661 this.fsOwnerShortUserName = fsOwner.getShortUserName(); 662 this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 663 DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT); 664 this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY, 665 DFS_PERMISSIONS_ENABLED_DEFAULT); 666 LOG.info("fsOwner = " + fsOwner); 667 LOG.info("supergroup = " + supergroup); 668 LOG.info("isPermissionEnabled = " + isPermissionEnabled); 669 670 // block allocation has to be persisted in HA using a shared edits directory 671 // so that the standby has up-to-date namespace information 672 String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf); 673 this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId); 674 675 // Sanity check the HA-related config. 676 if (nameserviceId != null) { 677 LOG.info("Determined nameservice ID: " + nameserviceId); 678 } 679 LOG.info("HA Enabled: " + haEnabled); 680 if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) { 681 LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf)); 682 throw new IOException("Invalid configuration: a shared edits dir " + 683 "must not be specified if HA is not enabled."); 684 } 685 686 // Get the checksum type from config 687 String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT); 688 DataChecksum.Type checksumType; 689 try { 690 checksumType = DataChecksum.Type.valueOf(checksumTypeStr); 691 } catch (IllegalArgumentException iae) { 692 throw new IOException("Invalid checksum type in " 693 + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr); 694 } 695 696 this.serverDefaults = new FsServerDefaults( 697 conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT), 698 conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT), 699 conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT), 700 (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT), 701 conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT), 702 conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT), 703 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT), 704 checksumType); 705 706 this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY, 707 DFS_NAMENODE_MAX_OBJECTS_DEFAULT); 708 709 this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 710 DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT); 711 this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY, 712 DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT); 713 this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 714 DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT); 715 this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT); 716 LOG.info("Append Enabled: " + supportAppends); 717 718 this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf); 719 720 this.standbyShouldCheckpoint = conf.getBoolean( 721 DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT); 722 // # edit autoroll threshold is a multiple of the checkpoint threshold 723 this.editLogRollerThreshold = (long) 724 (conf.getFloat( 725 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD, 726 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) * 727 conf.getLong( 728 DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 729 DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT)); 730 this.editLogRollerInterval = conf.getInt( 731 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS, 732 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT); 733 this.inodeId = new INodeId(); 734 735 // For testing purposes, allow the DT secret manager to be started regardless 736 // of whether security is enabled. 737 alwaysUseDelegationTokensForTests = conf.getBoolean( 738 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, 739 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT); 740 741 this.dtSecretManager = createDelegationTokenSecretManager(conf); 742 this.dir = new FSDirectory(fsImage, this, conf); 743 this.snapshotManager = new SnapshotManager(dir); 744 this.cacheManager = new CacheManager(this, conf, blockManager); 745 this.safeMode = new SafeModeInfo(conf); 746 this.auditLoggers = initAuditLoggers(conf); 747 this.isDefaultAuditLogger = auditLoggers.size() == 1 && 748 auditLoggers.get(0) instanceof DefaultAuditLogger; 749 this.retryCache = ignoreRetryCache ? null : initRetryCache(conf); 750 } catch(IOException e) { 751 LOG.error(getClass().getSimpleName() + " initialization failed.", e); 752 close(); 753 throw e; 754 } catch (RuntimeException re) { 755 LOG.error(getClass().getSimpleName() + " initialization failed.", re); 756 close(); 757 throw re; 758 } 759 } 760 761 @VisibleForTesting 762 public RetryCache getRetryCache() { 763 return retryCache; 764 } 765 766 /** Whether or not retry cache is enabled */ 767 boolean hasRetryCache() { 768 return retryCache != null; 769 } 770 771 void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) { 772 if (retryCache != null) { 773 retryCache.addCacheEntryWithPayload(clientId, callId, payload); 774 } 775 } 776 777 void addCacheEntry(byte[] clientId, int callId) { 778 if (retryCache != null) { 779 retryCache.addCacheEntry(clientId, callId); 780 } 781 } 782 783 @VisibleForTesting 784 static RetryCache initRetryCache(Configuration conf) { 785 boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, 786 DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT); 787 LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled")); 788 if (enable) { 789 float heapPercent = conf.getFloat( 790 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY, 791 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT); 792 long entryExpiryMillis = conf.getLong( 793 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY, 794 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT); 795 LOG.info("Retry cache will use " + heapPercent 796 + " of total heap and retry cache entry expiry time is " 797 + entryExpiryMillis + " millis"); 798 long entryExpiryNanos = entryExpiryMillis * 1000 * 1000; 799 return new RetryCache("Namenode Retry Cache", heapPercent, 800 entryExpiryNanos); 801 } 802 return null; 803 } 804 805 private List<AuditLogger> initAuditLoggers(Configuration conf) { 806 // Initialize the custom access loggers if configured. 807 Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY); 808 List<AuditLogger> auditLoggers = Lists.newArrayList(); 809 if (alClasses != null && !alClasses.isEmpty()) { 810 for (String className : alClasses) { 811 try { 812 AuditLogger logger; 813 if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) { 814 logger = new DefaultAuditLogger(); 815 } else { 816 logger = (AuditLogger) Class.forName(className).newInstance(); 817 } 818 logger.initialize(conf); 819 auditLoggers.add(logger); 820 } catch (RuntimeException re) { 821 throw re; 822 } catch (Exception e) { 823 throw new RuntimeException(e); 824 } 825 } 826 } 827 828 // Make sure there is at least one logger installed. 829 if (auditLoggers.isEmpty()) { 830 auditLoggers.add(new DefaultAuditLogger()); 831 } 832 return Collections.unmodifiableList(auditLoggers); 833 } 834 835 void loadFSImage(StartupOption startOpt, FSImage fsImage, boolean haEnabled) 836 throws IOException { 837 // format before starting up if requested 838 if (startOpt == StartupOption.FORMAT) { 839 840 fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id 841 842 startOpt = StartupOption.REGULAR; 843 } 844 boolean success = false; 845 writeLock(); 846 try { 847 // We shouldn't be calling saveNamespace if we've come up in standby state. 848 MetaRecoveryContext recovery = startOpt.createRecoveryContext(); 849 boolean needToSave = 850 fsImage.recoverTransitionRead(startOpt, this, recovery) && !haEnabled; 851 if (needToSave) { 852 fsImage.saveNamespace(this); 853 } else { 854 // No need to save, so mark the phase done. 855 StartupProgress prog = NameNode.getStartupProgress(); 856 prog.beginPhase(Phase.SAVING_CHECKPOINT); 857 prog.endPhase(Phase.SAVING_CHECKPOINT); 858 } 859 // This will start a new log segment and write to the seen_txid file, so 860 // we shouldn't do it when coming up in standby state 861 if (!haEnabled) { 862 fsImage.openEditLogForWrite(); 863 } 864 success = true; 865 } finally { 866 if (!success) { 867 fsImage.close(); 868 } 869 writeUnlock(); 870 } 871 dir.imageLoadComplete(); 872 } 873 874 private void startSecretManager() { 875 if (dtSecretManager != null) { 876 try { 877 dtSecretManager.startThreads(); 878 } catch (IOException e) { 879 // Inability to start secret manager 880 // can't be recovered from. 881 throw new RuntimeException(e); 882 } 883 } 884 } 885 886 private void startSecretManagerIfNecessary() { 887 boolean shouldRun = shouldUseDelegationTokens() && 888 !isInSafeMode() && getEditLog().isOpenForWrite(); 889 boolean running = dtSecretManager.isRunning(); 890 if (shouldRun && !running) { 891 startSecretManager(); 892 } 893 } 894 895 private void stopSecretManager() { 896 if (dtSecretManager != null) { 897 dtSecretManager.stopThreads(); 898 } 899 } 900 901 /** 902 * Start services common to both active and standby states 903 * @param haContext 904 * @throws IOException 905 */ 906 void startCommonServices(Configuration conf, HAContext haContext) throws IOException { 907 this.registerMBean(); // register the MBean for the FSNamesystemState 908 writeLock(); 909 this.haContext = haContext; 910 try { 911 nnResourceChecker = new NameNodeResourceChecker(conf); 912 checkAvailableResources(); 913 assert safeMode != null && 914 !safeMode.isPopulatingReplQueues(); 915 StartupProgress prog = NameNode.getStartupProgress(); 916 prog.beginPhase(Phase.SAFEMODE); 917 prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS, 918 getCompleteBlocksTotal()); 919 setBlockTotal(); 920 blockManager.activate(conf); 921 } finally { 922 writeUnlock(); 923 } 924 925 registerMXBean(); 926 DefaultMetricsSystem.instance().register(this); 927 } 928 929 /** 930 * Stop services common to both active and standby states 931 * @throws IOException 932 */ 933 void stopCommonServices() { 934 writeLock(); 935 try { 936 if (blockManager != null) blockManager.close(); 937 } finally { 938 writeUnlock(); 939 } 940 RetryCache.clear(retryCache); 941 } 942 943 /** 944 * Start services required in active state 945 * @throws IOException 946 */ 947 void startActiveServices() throws IOException { 948 startingActiveService = true; 949 LOG.info("Starting services required for active state"); 950 writeLock(); 951 try { 952 FSEditLog editLog = dir.fsImage.getEditLog(); 953 954 if (!editLog.isOpenForWrite()) { 955 // During startup, we're already open for write during initialization. 956 editLog.initJournalsForWrite(); 957 // May need to recover 958 editLog.recoverUnclosedStreams(); 959 960 LOG.info("Catching up to latest edits from old active before " + 961 "taking over writer role in edits logs"); 962 editLogTailer.catchupDuringFailover(); 963 964 blockManager.setPostponeBlocksFromFuture(false); 965 blockManager.getDatanodeManager().markAllDatanodesStale(); 966 blockManager.clearQueues(); 967 blockManager.processAllPendingDNMessages(); 968 969 if (!isInSafeMode() || 970 (isInSafeMode() && safeMode.isPopulatingReplQueues())) { 971 LOG.info("Reprocessing replication and invalidation queues"); 972 blockManager.processMisReplicatedBlocks(); 973 } 974 975 if (LOG.isDebugEnabled()) { 976 LOG.debug("NameNode metadata after re-processing " + 977 "replication and invalidation queues during failover:\n" + 978 metaSaveAsString()); 979 } 980 981 long nextTxId = dir.fsImage.getLastAppliedTxId() + 1; 982 LOG.info("Will take over writing edit logs at txnid " + 983 nextTxId); 984 editLog.setNextTxId(nextTxId); 985 986 dir.fsImage.editLog.openForWrite(); 987 } 988 if (haEnabled) { 989 // Renew all of the leases before becoming active. 990 // This is because, while we were in standby mode, 991 // the leases weren't getting renewed on this NN. 992 // Give them all a fresh start here. 993 leaseManager.renewAllLeases(); 994 } 995 leaseManager.startMonitor(); 996 startSecretManagerIfNecessary(); 997 998 //ResourceMonitor required only at ActiveNN. See HDFS-2914 999 this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); 1000 nnrmthread.start(); 1001 1002 nnEditLogRoller = new Daemon(new NameNodeEditLogRoller( 1003 editLogRollerThreshold, editLogRollerInterval)); 1004 nnEditLogRoller.start(); 1005 1006 cacheManager.startMonitorThread(); 1007 blockManager.getDatanodeManager().setShouldSendCachingCommands(true); 1008 } finally { 1009 writeUnlock(); 1010 startingActiveService = false; 1011 } 1012 } 1013 1014 /** 1015 * @return Whether the namenode is transitioning to active state and is in the 1016 * middle of the {@link #startActiveServices()} 1017 */ 1018 public boolean inTransitionToActive() { 1019 return haEnabled && haContext != null 1020 && haContext.getState().getServiceState() == HAServiceState.ACTIVE 1021 && startingActiveService; 1022 } 1023 1024 private boolean shouldUseDelegationTokens() { 1025 return UserGroupInformation.isSecurityEnabled() || 1026 alwaysUseDelegationTokensForTests; 1027 } 1028 1029 /** 1030 * Stop services required in active state 1031 * @throws InterruptedException 1032 */ 1033 void stopActiveServices() { 1034 LOG.info("Stopping services started for active state"); 1035 writeLock(); 1036 try { 1037 stopSecretManager(); 1038 if (leaseManager != null) { 1039 leaseManager.stopMonitor(); 1040 } 1041 if (nnrmthread != null) { 1042 ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor(); 1043 nnrmthread.interrupt(); 1044 } 1045 if (nnEditLogRoller != null) { 1046 ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop(); 1047 nnEditLogRoller.interrupt(); 1048 } 1049 if (dir != null && dir.fsImage != null) { 1050 if (dir.fsImage.editLog != null) { 1051 dir.fsImage.editLog.close(); 1052 } 1053 // Update the fsimage with the last txid that we wrote 1054 // so that the tailer starts from the right spot. 1055 dir.fsImage.updateLastAppliedTxIdFromWritten(); 1056 } 1057 cacheManager.stopMonitorThread(); 1058 cacheManager.clearDirectiveStats(); 1059 blockManager.getDatanodeManager().clearPendingCachingCommands(); 1060 blockManager.getDatanodeManager().setShouldSendCachingCommands(false); 1061 } finally { 1062 writeUnlock(); 1063 } 1064 } 1065 1066 /** 1067 * Start services required in standby state 1068 * 1069 * @throws IOException 1070 */ 1071 void startStandbyServices(final Configuration conf) throws IOException { 1072 LOG.info("Starting services required for standby state"); 1073 if (!dir.fsImage.editLog.isOpenForRead()) { 1074 // During startup, we're already open for read. 1075 dir.fsImage.editLog.initSharedJournalsForRead(); 1076 } 1077 1078 blockManager.setPostponeBlocksFromFuture(true); 1079 1080 editLogTailer = new EditLogTailer(this, conf); 1081 editLogTailer.start(); 1082 if (standbyShouldCheckpoint) { 1083 standbyCheckpointer = new StandbyCheckpointer(conf, this); 1084 standbyCheckpointer.start(); 1085 } 1086 } 1087 1088 1089 /** 1090 * Called while the NN is in Standby state, but just about to be 1091 * asked to enter Active state. This cancels any checkpoints 1092 * currently being taken. 1093 */ 1094 void prepareToStopStandbyServices() throws ServiceFailedException { 1095 if (standbyCheckpointer != null) { 1096 standbyCheckpointer.cancelAndPreventCheckpoints( 1097 "About to leave standby state"); 1098 } 1099 } 1100 1101 /** Stop services required in standby state */ 1102 void stopStandbyServices() throws IOException { 1103 LOG.info("Stopping services started for standby state"); 1104 if (standbyCheckpointer != null) { 1105 standbyCheckpointer.stop(); 1106 } 1107 if (editLogTailer != null) { 1108 editLogTailer.stop(); 1109 } 1110 if (dir != null && dir.fsImage != null && dir.fsImage.editLog != null) { 1111 dir.fsImage.editLog.close(); 1112 } 1113 } 1114 1115 @Override 1116 public void checkOperation(OperationCategory op) throws StandbyException { 1117 if (haContext != null) { 1118 // null in some unit tests 1119 haContext.checkOperation(op); 1120 } 1121 } 1122 1123 /** 1124 * @throws RetriableException 1125 * If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3) 1126 * NameNode is in active state 1127 * @throws SafeModeException 1128 * Otherwise if NameNode is in SafeMode. 1129 */ 1130 private void checkNameNodeSafeMode(String errorMsg) 1131 throws RetriableException, SafeModeException { 1132 if (isInSafeMode()) { 1133 SafeModeException se = new SafeModeException(errorMsg, safeMode); 1134 if (haEnabled && haContext != null 1135 && haContext.getState().getServiceState() == HAServiceState.ACTIVE 1136 && shouldRetrySafeMode(this.safeMode)) { 1137 throw new RetriableException(se); 1138 } else { 1139 throw se; 1140 } 1141 } 1142 } 1143 1144 /** 1145 * We already know that the safemode is on. We will throw a RetriableException 1146 * if the safemode is not manual or caused by low resource. 1147 */ 1148 private boolean shouldRetrySafeMode(SafeModeInfo safeMode) { 1149 if (safeMode == null) { 1150 return false; 1151 } else { 1152 return !safeMode.isManual() && !safeMode.areResourcesLow(); 1153 } 1154 } 1155 1156 public static Collection<URI> getNamespaceDirs(Configuration conf) { 1157 return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY); 1158 } 1159 1160 /** 1161 * Get all edits dirs which are required. If any shared edits dirs are 1162 * configured, these are also included in the set of required dirs. 1163 * 1164 * @param conf the HDFS configuration. 1165 * @return all required dirs. 1166 */ 1167 public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) { 1168 Set<URI> ret = new HashSet<URI>(); 1169 ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY)); 1170 ret.addAll(getSharedEditsDirs(conf)); 1171 return ret; 1172 } 1173 1174 private static Collection<URI> getStorageDirs(Configuration conf, 1175 String propertyName) { 1176 Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName); 1177 StartupOption startOpt = NameNode.getStartupOption(conf); 1178 if(startOpt == StartupOption.IMPORT) { 1179 // In case of IMPORT this will get rid of default directories 1180 // but will retain directories specified in hdfs-site.xml 1181 // When importing image from a checkpoint, the name-node can 1182 // start with empty set of storage directories. 1183 Configuration cE = new HdfsConfiguration(false); 1184 cE.addResource("core-default.xml"); 1185 cE.addResource("core-site.xml"); 1186 cE.addResource("hdfs-default.xml"); 1187 Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName); 1188 dirNames.removeAll(dirNames2); 1189 if(dirNames.isEmpty()) 1190 LOG.warn("!!! WARNING !!!" + 1191 "\n\tThe NameNode currently runs without persistent storage." + 1192 "\n\tAny changes to the file system meta-data may be lost." + 1193 "\n\tRecommended actions:" + 1194 "\n\t\t- shutdown and restart NameNode with configured \"" 1195 + propertyName + "\" in hdfs-site.xml;" + 1196 "\n\t\t- use Backup Node as a persistent and up-to-date storage " + 1197 "of the file system meta-data."); 1198 } else if (dirNames.isEmpty()) { 1199 dirNames = Collections.singletonList( 1200 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT); 1201 } 1202 return Util.stringCollectionAsURIs(dirNames); 1203 } 1204 1205 /** 1206 * Return an ordered list of edits directories to write to. 1207 * The list is ordered such that all shared edits directories 1208 * are ordered before non-shared directories, and any duplicates 1209 * are removed. The order they are specified in the configuration 1210 * is retained. 1211 * @return Collection of shared edits directories. 1212 * @throws IOException if multiple shared edits directories are configured 1213 */ 1214 public static List<URI> getNamespaceEditsDirs(Configuration conf) 1215 throws IOException { 1216 return getNamespaceEditsDirs(conf, true); 1217 } 1218 1219 public static List<URI> getNamespaceEditsDirs(Configuration conf, 1220 boolean includeShared) 1221 throws IOException { 1222 // Use a LinkedHashSet so that order is maintained while we de-dup 1223 // the entries. 1224 LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>(); 1225 1226 if (includeShared) { 1227 List<URI> sharedDirs = getSharedEditsDirs(conf); 1228 1229 // Fail until multiple shared edits directories are supported (HDFS-2782) 1230 if (sharedDirs.size() > 1) { 1231 throw new IOException( 1232 "Multiple shared edits directories are not yet supported"); 1233 } 1234 1235 // First add the shared edits dirs. It's critical that the shared dirs 1236 // are added first, since JournalSet syncs them in the order they are listed, 1237 // and we need to make sure all edits are in place in the shared storage 1238 // before they are replicated locally. See HDFS-2874. 1239 for (URI dir : sharedDirs) { 1240 if (!editsDirs.add(dir)) { 1241 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1242 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates."); 1243 } 1244 } 1245 } 1246 // Now add the non-shared dirs. 1247 for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) { 1248 if (!editsDirs.add(dir)) { 1249 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1250 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " + 1251 DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates."); 1252 } 1253 } 1254 1255 if (editsDirs.isEmpty()) { 1256 // If this is the case, no edit dirs have been explicitly configured. 1257 // Image dirs are to be used for edits too. 1258 return Lists.newArrayList(getNamespaceDirs(conf)); 1259 } else { 1260 return Lists.newArrayList(editsDirs); 1261 } 1262 } 1263 1264 /** 1265 * Returns edit directories that are shared between primary and secondary. 1266 * @param conf 1267 * @return Collection of edit directories. 1268 */ 1269 public static List<URI> getSharedEditsDirs(Configuration conf) { 1270 // don't use getStorageDirs here, because we want an empty default 1271 // rather than the dir in /tmp 1272 Collection<String> dirNames = conf.getTrimmedStringCollection( 1273 DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 1274 return Util.stringCollectionAsURIs(dirNames); 1275 } 1276 1277 @Override 1278 public void readLock() { 1279 this.fsLock.readLock().lock(); 1280 } 1281 @Override 1282 public void readUnlock() { 1283 this.fsLock.readLock().unlock(); 1284 } 1285 @Override 1286 public void writeLock() { 1287 this.fsLock.writeLock().lock(); 1288 } 1289 @Override 1290 public void writeLockInterruptibly() throws InterruptedException { 1291 this.fsLock.writeLock().lockInterruptibly(); 1292 } 1293 @Override 1294 public void writeUnlock() { 1295 this.fsLock.writeLock().unlock(); 1296 } 1297 @Override 1298 public boolean hasWriteLock() { 1299 return this.fsLock.isWriteLockedByCurrentThread(); 1300 } 1301 @Override 1302 public boolean hasReadLock() { 1303 return this.fsLock.getReadHoldCount() > 0 || hasWriteLock(); 1304 } 1305 1306 public int getReadHoldCount() { 1307 return this.fsLock.getReadHoldCount(); 1308 } 1309 1310 public int getWriteHoldCount() { 1311 return this.fsLock.getWriteHoldCount(); 1312 } 1313 1314 NamespaceInfo getNamespaceInfo() { 1315 readLock(); 1316 try { 1317 return unprotectedGetNamespaceInfo(); 1318 } finally { 1319 readUnlock(); 1320 } 1321 } 1322 1323 /** 1324 * Version of @see #getNamespaceInfo() that is not protected by a lock. 1325 */ 1326 NamespaceInfo unprotectedGetNamespaceInfo() { 1327 return new NamespaceInfo(dir.fsImage.getStorage().getNamespaceID(), 1328 getClusterId(), getBlockPoolId(), 1329 dir.fsImage.getStorage().getCTime()); 1330 } 1331 1332 /** 1333 * Close down this file system manager. 1334 * Causes heartbeat and lease daemons to stop; waits briefly for 1335 * them to finish, but a short timeout returns control back to caller. 1336 */ 1337 void close() { 1338 fsRunning = false; 1339 try { 1340 stopCommonServices(); 1341 if (smmthread != null) smmthread.interrupt(); 1342 } finally { 1343 // using finally to ensure we also wait for lease daemon 1344 try { 1345 stopActiveServices(); 1346 stopStandbyServices(); 1347 if (dir != null) { 1348 dir.close(); 1349 } 1350 } catch (IOException ie) { 1351 LOG.error("Error closing FSDirectory", ie); 1352 IOUtils.cleanup(LOG, dir); 1353 } 1354 } 1355 } 1356 1357 @Override 1358 public boolean isRunning() { 1359 return fsRunning; 1360 } 1361 1362 @Override 1363 public boolean isInStandbyState() { 1364 if (haContext == null || haContext.getState() == null) { 1365 // We're still starting up. In this case, if HA is 1366 // on for the cluster, we always start in standby. Otherwise 1367 // start in active. 1368 return haEnabled; 1369 } 1370 1371 return HAServiceState.STANDBY == haContext.getState().getServiceState(); 1372 } 1373 1374 /** 1375 * Dump all metadata into specified file 1376 */ 1377 void metaSave(String filename) throws IOException { 1378 checkSuperuserPrivilege(); 1379 checkOperation(OperationCategory.UNCHECKED); 1380 writeLock(); 1381 try { 1382 checkOperation(OperationCategory.UNCHECKED); 1383 File file = new File(System.getProperty("hadoop.log.dir"), filename); 1384 PrintWriter out = new PrintWriter(new BufferedWriter( 1385 new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8))); 1386 metaSave(out); 1387 out.flush(); 1388 out.close(); 1389 } finally { 1390 writeUnlock(); 1391 } 1392 } 1393 1394 private void metaSave(PrintWriter out) { 1395 assert hasWriteLock(); 1396 long totalInodes = this.dir.totalInodes(); 1397 long totalBlocks = this.getBlocksTotal(); 1398 out.println(totalInodes + " files and directories, " + totalBlocks 1399 + " blocks = " + (totalInodes + totalBlocks) + " total"); 1400 1401 blockManager.metaSave(out); 1402 } 1403 1404 private String metaSaveAsString() { 1405 StringWriter sw = new StringWriter(); 1406 PrintWriter pw = new PrintWriter(sw); 1407 metaSave(pw); 1408 pw.flush(); 1409 return sw.toString(); 1410 } 1411 1412 1413 long getDefaultBlockSize() { 1414 return serverDefaults.getBlockSize(); 1415 } 1416 1417 FsServerDefaults getServerDefaults() throws StandbyException { 1418 checkOperation(OperationCategory.READ); 1419 return serverDefaults; 1420 } 1421 1422 long getAccessTimePrecision() { 1423 return accessTimePrecision; 1424 } 1425 1426 private boolean isAccessTimeSupported() { 1427 return accessTimePrecision > 0; 1428 } 1429 1430 ///////////////////////////////////////////////////////// 1431 // 1432 // These methods are called by HadoopFS clients 1433 // 1434 ///////////////////////////////////////////////////////// 1435 /** 1436 * Set permissions for an existing file. 1437 * @throws IOException 1438 */ 1439 void setPermission(String src, FsPermission permission) 1440 throws AccessControlException, FileNotFoundException, SafeModeException, 1441 UnresolvedLinkException, IOException { 1442 try { 1443 setPermissionInt(src, permission); 1444 } catch (AccessControlException e) { 1445 logAuditEvent(false, "setPermission", src); 1446 throw e; 1447 } 1448 } 1449 1450 private void setPermissionInt(String src, FsPermission permission) 1451 throws AccessControlException, FileNotFoundException, SafeModeException, 1452 UnresolvedLinkException, IOException { 1453 HdfsFileStatus resultingStat = null; 1454 FSPermissionChecker pc = getPermissionChecker(); 1455 checkOperation(OperationCategory.WRITE); 1456 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1457 writeLock(); 1458 try { 1459 checkOperation(OperationCategory.WRITE); 1460 checkNameNodeSafeMode("Cannot set permission for " + src); 1461 src = FSDirectory.resolvePath(src, pathComponents, dir); 1462 checkOwner(pc, src); 1463 dir.setPermission(src, permission); 1464 resultingStat = getAuditFileInfo(src, false); 1465 } finally { 1466 writeUnlock(); 1467 } 1468 getEditLog().logSync(); 1469 logAuditEvent(true, "setPermission", src, null, resultingStat); 1470 } 1471 1472 /** 1473 * Set owner for an existing file. 1474 * @throws IOException 1475 */ 1476 void setOwner(String src, String username, String group) 1477 throws AccessControlException, FileNotFoundException, SafeModeException, 1478 UnresolvedLinkException, IOException { 1479 try { 1480 setOwnerInt(src, username, group); 1481 } catch (AccessControlException e) { 1482 logAuditEvent(false, "setOwner", src); 1483 throw e; 1484 } 1485 } 1486 1487 private void setOwnerInt(String src, String username, String group) 1488 throws AccessControlException, FileNotFoundException, SafeModeException, 1489 UnresolvedLinkException, IOException { 1490 HdfsFileStatus resultingStat = null; 1491 FSPermissionChecker pc = getPermissionChecker(); 1492 checkOperation(OperationCategory.WRITE); 1493 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1494 writeLock(); 1495 try { 1496 checkOperation(OperationCategory.WRITE); 1497 checkNameNodeSafeMode("Cannot set owner for " + src); 1498 src = FSDirectory.resolvePath(src, pathComponents, dir); 1499 checkOwner(pc, src); 1500 if (!pc.isSuperUser()) { 1501 if (username != null && !pc.getUser().equals(username)) { 1502 throw new AccessControlException("Non-super user cannot change owner"); 1503 } 1504 if (group != null && !pc.containsGroup(group)) { 1505 throw new AccessControlException("User does not belong to " + group); 1506 } 1507 } 1508 dir.setOwner(src, username, group); 1509 resultingStat = getAuditFileInfo(src, false); 1510 } finally { 1511 writeUnlock(); 1512 } 1513 getEditLog().logSync(); 1514 logAuditEvent(true, "setOwner", src, null, resultingStat); 1515 } 1516 1517 /** 1518 * Get block locations within the specified range. 1519 * @see ClientProtocol#getBlockLocations(String, long, long) 1520 */ 1521 LocatedBlocks getBlockLocations(String clientMachine, String src, 1522 long offset, long length) throws AccessControlException, 1523 FileNotFoundException, UnresolvedLinkException, IOException { 1524 LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true, 1525 true); 1526 if (blocks != null) { 1527 blockManager.getDatanodeManager().sortLocatedBlocks( 1528 clientMachine, blocks.getLocatedBlocks()); 1529 1530 LocatedBlock lastBlock = blocks.getLastLocatedBlock(); 1531 if (lastBlock != null) { 1532 ArrayList<LocatedBlock> lastBlockList = new ArrayList<LocatedBlock>(); 1533 lastBlockList.add(lastBlock); 1534 blockManager.getDatanodeManager().sortLocatedBlocks( 1535 clientMachine, lastBlockList); 1536 } 1537 } 1538 return blocks; 1539 } 1540 1541 /** 1542 * Get block locations within the specified range. 1543 * @see ClientProtocol#getBlockLocations(String, long, long) 1544 * @throws FileNotFoundException, UnresolvedLinkException, IOException 1545 */ 1546 LocatedBlocks getBlockLocations(String src, long offset, long length, 1547 boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode) 1548 throws FileNotFoundException, UnresolvedLinkException, IOException { 1549 try { 1550 return getBlockLocationsInt(src, offset, length, doAccessTime, 1551 needBlockToken, checkSafeMode); 1552 } catch (AccessControlException e) { 1553 logAuditEvent(false, "open", src); 1554 throw e; 1555 } 1556 } 1557 1558 private LocatedBlocks getBlockLocationsInt(String src, long offset, 1559 long length, boolean doAccessTime, boolean needBlockToken, 1560 boolean checkSafeMode) 1561 throws FileNotFoundException, UnresolvedLinkException, IOException { 1562 if (offset < 0) { 1563 throw new HadoopIllegalArgumentException( 1564 "Negative offset is not supported. File: " + src); 1565 } 1566 if (length < 0) { 1567 throw new HadoopIllegalArgumentException( 1568 "Negative length is not supported. File: " + src); 1569 } 1570 final LocatedBlocks ret = getBlockLocationsUpdateTimes(src, 1571 offset, length, doAccessTime, needBlockToken); 1572 logAuditEvent(true, "open", src); 1573 if (checkSafeMode && isInSafeMode()) { 1574 for (LocatedBlock b : ret.getLocatedBlocks()) { 1575 // if safemode & no block locations yet then throw safemodeException 1576 if ((b.getLocations() == null) || (b.getLocations().length == 0)) { 1577 SafeModeException se = new SafeModeException( 1578 "Zero blocklocations for " + src, safeMode); 1579 if (haEnabled && haContext != null && 1580 haContext.getState().getServiceState() == HAServiceState.ACTIVE) { 1581 throw new RetriableException(se); 1582 } else { 1583 throw se; 1584 } 1585 } 1586 } 1587 } 1588 return ret; 1589 } 1590 1591 /* 1592 * Get block locations within the specified range, updating the 1593 * access times if necessary. 1594 */ 1595 private LocatedBlocks getBlockLocationsUpdateTimes(String src, long offset, 1596 long length, boolean doAccessTime, boolean needBlockToken) 1597 throws FileNotFoundException, 1598 UnresolvedLinkException, IOException { 1599 FSPermissionChecker pc = getPermissionChecker(); 1600 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1601 for (int attempt = 0; attempt < 2; attempt++) { 1602 boolean isReadOp = (attempt == 0); 1603 if (isReadOp) { // first attempt is with readlock 1604 checkOperation(OperationCategory.READ); 1605 readLock(); 1606 } else { // second attempt is with write lock 1607 checkOperation(OperationCategory.WRITE); 1608 writeLock(); // writelock is needed to set accesstime 1609 } 1610 src = FSDirectory.resolvePath(src, pathComponents, dir); 1611 try { 1612 if (isReadOp) { 1613 checkOperation(OperationCategory.READ); 1614 } else { 1615 checkOperation(OperationCategory.WRITE); 1616 } 1617 if (isPermissionEnabled) { 1618 checkPathAccess(pc, src, FsAction.READ); 1619 } 1620 1621 // if the namenode is in safemode, then do not update access time 1622 if (isInSafeMode()) { 1623 doAccessTime = false; 1624 } 1625 1626 final INodesInPath iip = dir.getLastINodeInPath(src); 1627 final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src); 1628 if (!iip.isSnapshot() //snapshots are readonly, so don't update atime. 1629 && doAccessTime && isAccessTimeSupported()) { 1630 final long now = now(); 1631 if (now > inode.getAccessTime() + getAccessTimePrecision()) { 1632 // if we have to set access time but we only have the readlock, then 1633 // restart this entire operation with the writeLock. 1634 if (isReadOp) { 1635 continue; 1636 } 1637 dir.setTimes(src, inode, -1, now, false, iip.getLatestSnapshot()); 1638 } 1639 } 1640 final long fileSize = iip.isSnapshot() ? 1641 inode.computeFileSize(iip.getPathSnapshot()) 1642 : inode.computeFileSizeNotIncludingLastUcBlock(); 1643 boolean isUc = inode.isUnderConstruction(); 1644 if (iip.isSnapshot()) { 1645 // if src indicates a snapshot file, we need to make sure the returned 1646 // blocks do not exceed the size of the snapshot file. 1647 length = Math.min(length, fileSize - offset); 1648 isUc = false; 1649 } 1650 LocatedBlocks blocks = 1651 blockManager.createLocatedBlocks(inode.getBlocks(), fileSize, 1652 isUc, offset, length, needBlockToken, iip.isSnapshot()); 1653 // Set caching information for the located blocks. 1654 for (LocatedBlock lb: blocks.getLocatedBlocks()) { 1655 cacheManager.setCachedLocations(lb); 1656 } 1657 return blocks; 1658 } finally { 1659 if (isReadOp) { 1660 readUnlock(); 1661 } else { 1662 writeUnlock(); 1663 } 1664 } 1665 } 1666 return null; // can never reach here 1667 } 1668 1669 /** 1670 * Moves all the blocks from srcs and appends them to trg 1671 * To avoid rollbacks we will verify validitity of ALL of the args 1672 * before we start actual move. 1673 * 1674 * This does not support ".inodes" relative path 1675 * @param target 1676 * @param srcs 1677 * @throws IOException 1678 */ 1679 void concat(String target, String [] srcs) 1680 throws IOException, UnresolvedLinkException { 1681 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 1682 if (cacheEntry != null && cacheEntry.isSuccess()) { 1683 return; // Return previous response 1684 } 1685 1686 // Either there is no previous request in progres or it has failed 1687 if(FSNamesystem.LOG.isDebugEnabled()) { 1688 FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) + 1689 " to " + target); 1690 } 1691 1692 boolean success = false; 1693 try { 1694 concatInt(target, srcs, cacheEntry != null); 1695 success = true; 1696 } catch (AccessControlException e) { 1697 logAuditEvent(false, "concat", Arrays.toString(srcs), target, null); 1698 throw e; 1699 } finally { 1700 RetryCache.setState(cacheEntry, success); 1701 } 1702 } 1703 1704 private void concatInt(String target, String [] srcs, 1705 boolean logRetryCache) throws IOException, UnresolvedLinkException { 1706 // verify args 1707 if(target.isEmpty()) { 1708 throw new IllegalArgumentException("Target file name is empty"); 1709 } 1710 if(srcs == null || srcs.length == 0) { 1711 throw new IllegalArgumentException("No sources given"); 1712 } 1713 1714 // We require all files be in the same directory 1715 String trgParent = 1716 target.substring(0, target.lastIndexOf(Path.SEPARATOR_CHAR)); 1717 for (String s : srcs) { 1718 String srcParent = s.substring(0, s.lastIndexOf(Path.SEPARATOR_CHAR)); 1719 if (!srcParent.equals(trgParent)) { 1720 throw new IllegalArgumentException( 1721 "Sources and target are not in the same directory"); 1722 } 1723 } 1724 1725 HdfsFileStatus resultingStat = null; 1726 FSPermissionChecker pc = getPermissionChecker(); 1727 checkOperation(OperationCategory.WRITE); 1728 writeLock(); 1729 try { 1730 checkOperation(OperationCategory.WRITE); 1731 checkNameNodeSafeMode("Cannot concat " + target); 1732 concatInternal(pc, target, srcs, logRetryCache); 1733 resultingStat = getAuditFileInfo(target, false); 1734 } finally { 1735 writeUnlock(); 1736 } 1737 getEditLog().logSync(); 1738 logAuditEvent(true, "concat", Arrays.toString(srcs), target, resultingStat); 1739 } 1740 1741 /** See {@link #concat(String, String[])} */ 1742 private void concatInternal(FSPermissionChecker pc, String target, 1743 String[] srcs, boolean logRetryCache) throws IOException, 1744 UnresolvedLinkException { 1745 assert hasWriteLock(); 1746 1747 // write permission for the target 1748 if (isPermissionEnabled) { 1749 checkPathAccess(pc, target, FsAction.WRITE); 1750 1751 // and srcs 1752 for(String aSrc: srcs) { 1753 checkPathAccess(pc, aSrc, FsAction.READ); // read the file 1754 checkParentAccess(pc, aSrc, FsAction.WRITE); // for delete 1755 } 1756 } 1757 1758 // to make sure no two files are the same 1759 Set<INode> si = new HashSet<INode>(); 1760 1761 // we put the following prerequisite for the operation 1762 // replication and blocks sizes should be the same for ALL the blocks 1763 1764 // check the target 1765 final INodeFile trgInode = INodeFile.valueOf(dir.getINode4Write(target), 1766 target); 1767 if(trgInode.isUnderConstruction()) { 1768 throw new HadoopIllegalArgumentException("concat: target file " 1769 + target + " is under construction"); 1770 } 1771 // per design target shouldn't be empty and all the blocks same size 1772 if(trgInode.numBlocks() == 0) { 1773 throw new HadoopIllegalArgumentException("concat: target file " 1774 + target + " is empty"); 1775 } 1776 if (trgInode instanceof INodeFileWithSnapshot) { 1777 throw new HadoopIllegalArgumentException("concat: target file " 1778 + target + " is in a snapshot"); 1779 } 1780 1781 long blockSize = trgInode.getPreferredBlockSize(); 1782 1783 // check the end block to be full 1784 final BlockInfo last = trgInode.getLastBlock(); 1785 if(blockSize != last.getNumBytes()) { 1786 throw new HadoopIllegalArgumentException("The last block in " + target 1787 + " is not full; last block size = " + last.getNumBytes() 1788 + " but file block size = " + blockSize); 1789 } 1790 1791 si.add(trgInode); 1792 final short repl = trgInode.getFileReplication(); 1793 1794 // now check the srcs 1795 boolean endSrc = false; // final src file doesn't have to have full end block 1796 for(int i=0; i<srcs.length; i++) { 1797 String src = srcs[i]; 1798 if(i==srcs.length-1) 1799 endSrc=true; 1800 1801 final INodeFile srcInode = INodeFile.valueOf(dir.getINode4Write(src), src); 1802 if(src.isEmpty() 1803 || srcInode.isUnderConstruction() 1804 || srcInode.numBlocks() == 0) { 1805 throw new HadoopIllegalArgumentException("concat: source file " + src 1806 + " is invalid or empty or underConstruction"); 1807 } 1808 1809 // check replication and blocks size 1810 if(repl != srcInode.getBlockReplication()) { 1811 throw new HadoopIllegalArgumentException("concat: the soruce file " 1812 + src + " and the target file " + target 1813 + " should have the same replication: source replication is " 1814 + srcInode.getBlockReplication() 1815 + " but target replication is " + repl); 1816 } 1817 1818 //boolean endBlock=false; 1819 // verify that all the blocks are of the same length as target 1820 // should be enough to check the end blocks 1821 final BlockInfo[] srcBlocks = srcInode.getBlocks(); 1822 int idx = srcBlocks.length-1; 1823 if(endSrc) 1824 idx = srcBlocks.length-2; // end block of endSrc is OK not to be full 1825 if(idx >= 0 && srcBlocks[idx].getNumBytes() != blockSize) { 1826 throw new HadoopIllegalArgumentException("concat: the soruce file " 1827 + src + " and the target file " + target 1828 + " should have the same blocks sizes: target block size is " 1829 + blockSize + " but the size of source block " + idx + " is " 1830 + srcBlocks[idx].getNumBytes()); 1831 } 1832 1833 si.add(srcInode); 1834 } 1835 1836 // make sure no two files are the same 1837 if(si.size() < srcs.length+1) { // trg + srcs 1838 // it means at least two files are the same 1839 throw new HadoopIllegalArgumentException( 1840 "concat: at least two of the source files are the same"); 1841 } 1842 1843 if(NameNode.stateChangeLog.isDebugEnabled()) { 1844 NameNode.stateChangeLog.debug("DIR* NameSystem.concat: " + 1845 Arrays.toString(srcs) + " to " + target); 1846 } 1847 1848 dir.concat(target,srcs, logRetryCache); 1849 } 1850 1851 /** 1852 * stores the modification and access time for this inode. 1853 * The access time is precise upto an hour. The transaction, if needed, is 1854 * written to the edits log but is not flushed. 1855 */ 1856 void setTimes(String src, long mtime, long atime) 1857 throws IOException, UnresolvedLinkException { 1858 if (!isAccessTimeSupported() && atime != -1) { 1859 throw new IOException("Access time for hdfs is not configured. " + 1860 " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter."); 1861 } 1862 try { 1863 setTimesInt(src, mtime, atime); 1864 } catch (AccessControlException e) { 1865 logAuditEvent(false, "setTimes", src); 1866 throw e; 1867 } 1868 } 1869 1870 private void setTimesInt(String src, long mtime, long atime) 1871 throws IOException, UnresolvedLinkException { 1872 HdfsFileStatus resultingStat = null; 1873 FSPermissionChecker pc = getPermissionChecker(); 1874 checkOperation(OperationCategory.WRITE); 1875 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1876 writeLock(); 1877 try { 1878 checkOperation(OperationCategory.WRITE); 1879 checkNameNodeSafeMode("Cannot set times " + src); 1880 src = FSDirectory.resolvePath(src, pathComponents, dir); 1881 1882 // Write access is required to set access and modification times 1883 if (isPermissionEnabled) { 1884 checkPathAccess(pc, src, FsAction.WRITE); 1885 } 1886 final INodesInPath iip = dir.getINodesInPath4Write(src); 1887 final INode inode = iip.getLastINode(); 1888 if (inode != null) { 1889 dir.setTimes(src, inode, mtime, atime, true, iip.getLatestSnapshot()); 1890 resultingStat = getAuditFileInfo(src, false); 1891 } else { 1892 throw new FileNotFoundException("File/Directory " + src + " does not exist."); 1893 } 1894 } finally { 1895 writeUnlock(); 1896 } 1897 logAuditEvent(true, "setTimes", src, null, resultingStat); 1898 } 1899 1900 /** 1901 * Create a symbolic link. 1902 */ 1903 @SuppressWarnings("deprecation") 1904 void createSymlink(String target, String link, 1905 PermissionStatus dirPerms, boolean createParent) 1906 throws IOException, UnresolvedLinkException { 1907 if (!FileSystem.areSymlinksEnabled()) { 1908 throw new UnsupportedOperationException("Symlinks not supported"); 1909 } 1910 if (!DFSUtil.isValidName(link)) { 1911 throw new InvalidPathException("Invalid link name: " + link); 1912 } 1913 if (FSDirectory.isReservedName(target)) { 1914 throw new InvalidPathException("Invalid target name: " + target); 1915 } 1916 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 1917 if (cacheEntry != null && cacheEntry.isSuccess()) { 1918 return; // Return previous response 1919 } 1920 boolean success = false; 1921 try { 1922 createSymlinkInt(target, link, dirPerms, createParent, cacheEntry != null); 1923 success = true; 1924 } catch (AccessControlException e) { 1925 logAuditEvent(false, "createSymlink", link, target, null); 1926 throw e; 1927 } finally { 1928 RetryCache.setState(cacheEntry, success); 1929 } 1930 } 1931 1932 private void createSymlinkInt(String target, String link, 1933 PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) 1934 throws IOException, UnresolvedLinkException { 1935 if (NameNode.stateChangeLog.isDebugEnabled()) { 1936 NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target=" 1937 + target + " link=" + link); 1938 } 1939 HdfsFileStatus resultingStat = null; 1940 FSPermissionChecker pc = getPermissionChecker(); 1941 checkOperation(OperationCategory.WRITE); 1942 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(link); 1943 writeLock(); 1944 try { 1945 checkOperation(OperationCategory.WRITE); 1946 checkNameNodeSafeMode("Cannot create symlink " + link); 1947 link = FSDirectory.resolvePath(link, pathComponents, dir); 1948 if (!createParent) { 1949 verifyParentDir(link); 1950 } 1951 if (!dir.isValidToCreate(link)) { 1952 throw new IOException("failed to create link " + link 1953 +" either because the filename is invalid or the file exists"); 1954 } 1955 if (isPermissionEnabled) { 1956 checkAncestorAccess(pc, link, FsAction.WRITE); 1957 } 1958 // validate that we have enough inodes. 1959 checkFsObjectLimit(); 1960 1961 // add symbolic link to namespace 1962 dir.addSymlink(link, target, dirPerms, createParent, logRetryCache); 1963 resultingStat = getAuditFileInfo(link, false); 1964 } finally { 1965 writeUnlock(); 1966 } 1967 getEditLog().logSync(); 1968 logAuditEvent(true, "createSymlink", link, target, resultingStat); 1969 } 1970 1971 /** 1972 * Set replication for an existing file. 1973 * 1974 * The NameNode sets new replication and schedules either replication of 1975 * under-replicated data blocks or removal of the excessive block copies 1976 * if the blocks are over-replicated. 1977 * 1978 * @see ClientProtocol#setReplication(String, short) 1979 * @param src file name 1980 * @param replication new replication 1981 * @return true if successful; 1982 * false if file does not exist or is a directory 1983 */ 1984 boolean setReplication(final String src, final short replication) 1985 throws IOException { 1986 try { 1987 return setReplicationInt(src, replication); 1988 } catch (AccessControlException e) { 1989 logAuditEvent(false, "setReplication", src); 1990 throw e; 1991 } 1992 } 1993 1994 private boolean setReplicationInt(String src, final short replication) 1995 throws IOException { 1996 blockManager.verifyReplication(src, replication, null); 1997 final boolean isFile; 1998 FSPermissionChecker pc = getPermissionChecker(); 1999 checkOperation(OperationCategory.WRITE); 2000 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2001 writeLock(); 2002 try { 2003 checkOperation(OperationCategory.WRITE); 2004 checkNameNodeSafeMode("Cannot set replication for " + src); 2005 src = FSDirectory.resolvePath(src, pathComponents, dir); 2006 if (isPermissionEnabled) { 2007 checkPathAccess(pc, src, FsAction.WRITE); 2008 } 2009 2010 final short[] blockRepls = new short[2]; // 0: old, 1: new 2011 final Block[] blocks = dir.setReplication(src, replication, blockRepls); 2012 isFile = blocks != null; 2013 if (isFile) { 2014 blockManager.setReplication(blockRepls[0], blockRepls[1], src, blocks); 2015 } 2016 } finally { 2017 writeUnlock(); 2018 } 2019 2020 getEditLog().logSync(); 2021 if (isFile) { 2022 logAuditEvent(true, "setReplication", src); 2023 } 2024 return isFile; 2025 } 2026 2027 long getPreferredBlockSize(String filename) 2028 throws IOException, UnresolvedLinkException { 2029 FSPermissionChecker pc = getPermissionChecker(); 2030 checkOperation(OperationCategory.READ); 2031 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(filename); 2032 readLock(); 2033 try { 2034 checkOperation(OperationCategory.READ); 2035 filename = FSDirectory.resolvePath(filename, pathComponents, dir); 2036 if (isPermissionEnabled) { 2037 checkTraverse(pc, filename); 2038 } 2039 return dir.getPreferredBlockSize(filename); 2040 } finally { 2041 readUnlock(); 2042 } 2043 } 2044 2045 /** 2046 * Verify that parent directory of src exists. 2047 */ 2048 private void verifyParentDir(String src) throws FileNotFoundException, 2049 ParentNotDirectoryException, UnresolvedLinkException { 2050 assert hasReadLock(); 2051 Path parent = new Path(src).getParent(); 2052 if (parent != null) { 2053 final INode parentNode = dir.getINode(parent.toString()); 2054 if (parentNode == null) { 2055 throw new FileNotFoundException("Parent directory doesn't exist: " 2056 + parent); 2057 } else if (!parentNode.isDirectory() && !parentNode.isSymlink()) { 2058 throw new ParentNotDirectoryException("Parent path is not a directory: " 2059 + parent); 2060 } 2061 } 2062 } 2063 2064 /** 2065 * Create a new file entry in the namespace. 2066 * 2067 * For description of parameters and exceptions thrown see 2068 * {@link ClientProtocol#create()}, except it returns valid file status upon 2069 * success 2070 * 2071 * For retryCache handling details see - 2072 * {@link #getFileStatus(boolean, CacheEntryWithPayload)} 2073 * 2074 */ 2075 HdfsFileStatus startFile(String src, PermissionStatus permissions, 2076 String holder, String clientMachine, EnumSet<CreateFlag> flag, 2077 boolean createParent, short replication, long blockSize) 2078 throws AccessControlException, SafeModeException, 2079 FileAlreadyExistsException, UnresolvedLinkException, 2080 FileNotFoundException, ParentNotDirectoryException, IOException { 2081 HdfsFileStatus status = null; 2082 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 2083 null); 2084 if (cacheEntry != null && cacheEntry.isSuccess()) { 2085 return (HdfsFileStatus) cacheEntry.getPayload(); 2086 } 2087 2088 try { 2089 status = startFileInt(src, permissions, holder, clientMachine, flag, 2090 createParent, replication, blockSize, cacheEntry != null); 2091 } catch (AccessControlException e) { 2092 logAuditEvent(false, "create", src); 2093 throw e; 2094 } finally { 2095 RetryCache.setState(cacheEntry, status != null, status); 2096 } 2097 return status; 2098 } 2099 2100 private HdfsFileStatus startFileInt(String src, PermissionStatus permissions, 2101 String holder, String clientMachine, EnumSet<CreateFlag> flag, 2102 boolean createParent, short replication, long blockSize, 2103 boolean logRetryCache) throws AccessControlException, SafeModeException, 2104 FileAlreadyExistsException, UnresolvedLinkException, 2105 FileNotFoundException, ParentNotDirectoryException, IOException { 2106 if (NameNode.stateChangeLog.isDebugEnabled()) { 2107 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src 2108 + ", holder=" + holder 2109 + ", clientMachine=" + clientMachine 2110 + ", createParent=" + createParent 2111 + ", replication=" + replication 2112 + ", createFlag=" + flag.toString()); 2113 } 2114 if (!DFSUtil.isValidName(src)) { 2115 throw new InvalidPathException(src); 2116 } 2117 blockManager.verifyReplication(src, replication, clientMachine); 2118 2119 boolean skipSync = false; 2120 HdfsFileStatus stat = null; 2121 FSPermissionChecker pc = getPermissionChecker(); 2122 checkOperation(OperationCategory.WRITE); 2123 if (blockSize < minBlockSize) { 2124 throw new IOException("Specified block size is less than configured" + 2125 " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY 2126 + "): " + blockSize + " < " + minBlockSize); 2127 } 2128 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2129 boolean create = flag.contains(CreateFlag.CREATE); 2130 boolean overwrite = flag.contains(CreateFlag.OVERWRITE); 2131 writeLock(); 2132 try { 2133 checkOperation(OperationCategory.WRITE); 2134 checkNameNodeSafeMode("Cannot create file" + src); 2135 src = FSDirectory.resolvePath(src, pathComponents, dir); 2136 startFileInternal(pc, src, permissions, holder, clientMachine, create, 2137 overwrite, createParent, replication, blockSize, logRetryCache); 2138 stat = dir.getFileInfo(src, false); 2139 } catch (StandbyException se) { 2140 skipSync = true; 2141 throw se; 2142 } finally { 2143 writeUnlock(); 2144 // There might be transactions logged while trying to recover the lease. 2145 // They need to be sync'ed even when an exception was thrown. 2146 if (!skipSync) { 2147 getEditLog().logSync(); 2148 } 2149 } 2150 logAuditEvent(true, "create", src, null, stat); 2151 return stat; 2152 } 2153 2154 /** 2155 * Create a new file or overwrite an existing file<br> 2156 * 2157 * Once the file is create the client then allocates a new block with the next 2158 * call using {@link NameNode#addBlock()}. 2159 * <p> 2160 * For description of parameters and exceptions thrown see 2161 * {@link ClientProtocol#create()} 2162 */ 2163 private void startFileInternal(FSPermissionChecker pc, String src, 2164 PermissionStatus permissions, String holder, String clientMachine, 2165 boolean create, boolean overwrite, boolean createParent, 2166 short replication, long blockSize, boolean logRetryEntry) 2167 throws FileAlreadyExistsException, AccessControlException, 2168 UnresolvedLinkException, FileNotFoundException, 2169 ParentNotDirectoryException, IOException { 2170 assert hasWriteLock(); 2171 // Verify that the destination does not exist as a directory already. 2172 final INodesInPath iip = dir.getINodesInPath4Write(src); 2173 final INode inode = iip.getLastINode(); 2174 if (inode != null && inode.isDirectory()) { 2175 throw new FileAlreadyExistsException("Cannot create file " + src 2176 + "; already exists as a directory."); 2177 } 2178 final INodeFile myFile = INodeFile.valueOf(inode, src, true); 2179 if (isPermissionEnabled) { 2180 if (overwrite && myFile != null) { 2181 checkPathAccess(pc, src, FsAction.WRITE); 2182 } else { 2183 checkAncestorAccess(pc, src, FsAction.WRITE); 2184 } 2185 } 2186 2187 if (!createParent) { 2188 verifyParentDir(src); 2189 } 2190 2191 try { 2192 if (myFile == null) { 2193 if (!create) { 2194 throw new FileNotFoundException("failed to overwrite non-existent file " 2195 + src + " on client " + clientMachine); 2196 } 2197 } else { 2198 if (overwrite) { 2199 try { 2200 deleteInt(src, true, false); // File exists - delete if overwrite 2201 } catch (AccessControlException e) { 2202 logAuditEvent(false, "delete", src); 2203 throw e; 2204 } 2205 } else { 2206 // If lease soft limit time is expired, recover the lease 2207 recoverLeaseInternal(myFile, src, holder, clientMachine, false); 2208 throw new FileAlreadyExistsException("failed to create file " + src 2209 + " on client " + clientMachine + " because the file exists"); 2210 } 2211 } 2212 2213 checkFsObjectLimit(); 2214 final DatanodeDescriptor clientNode = 2215 blockManager.getDatanodeManager().getDatanodeByHost(clientMachine); 2216 2217 INodeFileUnderConstruction newNode = dir.addFile(src, permissions, 2218 replication, blockSize, holder, clientMachine, clientNode); 2219 if (newNode == null) { 2220 throw new IOException("DIR* NameSystem.startFile: " + 2221 "Unable to add file to namespace."); 2222 } 2223 leaseManager.addLease(newNode.getClientName(), src); 2224 2225 // record file record in log, record new generation stamp 2226 getEditLog().logOpenFile(src, newNode, logRetryEntry); 2227 if (NameNode.stateChangeLog.isDebugEnabled()) { 2228 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: " 2229 +"add "+src+" to namespace for "+holder); 2230 } 2231 } catch (IOException ie) { 2232 NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " 2233 +ie.getMessage()); 2234 throw ie; 2235 } 2236 } 2237 2238 /** 2239 * Append to an existing file for append. 2240 * <p> 2241 * 2242 * The method returns the last block of the file if this is a partial block, 2243 * which can still be used for writing more data. The client uses the returned 2244 * block locations to form the data pipeline for this block.<br> 2245 * The method returns null if the last block is full. The client then 2246 * allocates a new block with the next call using {@link NameNode#addBlock()}. 2247 * <p> 2248 * 2249 * For description of parameters and exceptions thrown see 2250 * {@link ClientProtocol#append(String, String)} 2251 * 2252 * @return the last block locations if the block is partial or null otherwise 2253 */ 2254 private LocatedBlock appendFileInternal(FSPermissionChecker pc, String src, 2255 String holder, String clientMachine, boolean logRetryCache) 2256 throws AccessControlException, UnresolvedLinkException, 2257 FileNotFoundException, IOException { 2258 assert hasWriteLock(); 2259 // Verify that the destination does not exist as a directory already. 2260 final INodesInPath iip = dir.getINodesInPath4Write(src); 2261 final INode inode = iip.getLastINode(); 2262 if (inode != null && inode.isDirectory()) { 2263 throw new FileAlreadyExistsException("Cannot append to directory " + src 2264 + "; already exists as a directory."); 2265 } 2266 if (isPermissionEnabled) { 2267 checkPathAccess(pc, src, FsAction.WRITE); 2268 } 2269 2270 try { 2271 if (inode == null) { 2272 throw new FileNotFoundException("failed to append to non-existent file " 2273 + src + " on client " + clientMachine); 2274 } 2275 INodeFile myFile = INodeFile.valueOf(inode, src, true); 2276 // Opening an existing file for write - may need to recover lease. 2277 recoverLeaseInternal(myFile, src, holder, clientMachine, false); 2278 2279 // recoverLeaseInternal may create a new InodeFile via 2280 // finalizeINodeFileUnderConstruction so we need to refresh 2281 // the referenced file. 2282 myFile = INodeFile.valueOf(dir.getINode(src), src, true); 2283 2284 final DatanodeDescriptor clientNode = 2285 blockManager.getDatanodeManager().getDatanodeByHost(clientMachine); 2286 return prepareFileForWrite(src, myFile, holder, clientMachine, clientNode, 2287 true, iip.getLatestSnapshot(), logRetryCache); 2288 } catch (IOException ie) { 2289 NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage()); 2290 throw ie; 2291 } 2292 } 2293 2294 /** 2295 * Replace current node with a INodeUnderConstruction. 2296 * Recreate in-memory lease record. 2297 * 2298 * @param src path to the file 2299 * @param file existing file object 2300 * @param leaseHolder identifier of the lease holder on this file 2301 * @param clientMachine identifier of the client machine 2302 * @param clientNode if the client is collocated with a DN, that DN's descriptor 2303 * @param writeToEditLog whether to persist this change to the edit log 2304 * @param logRetryCache whether to record RPC ids in editlog for retry cache 2305 * rebuilding 2306 * @return the last block locations if the block is partial or null otherwise 2307 * @throws UnresolvedLinkException 2308 * @throws IOException 2309 */ 2310 LocatedBlock prepareFileForWrite(String src, INodeFile file, 2311 String leaseHolder, String clientMachine, DatanodeDescriptor clientNode, 2312 boolean writeToEditLog, Snapshot latestSnapshot, boolean logRetryCache) 2313 throws IOException { 2314 file = file.recordModification(latestSnapshot, dir.getINodeMap()); 2315 final INodeFileUnderConstruction cons = file.toUnderConstruction( 2316 leaseHolder, clientMachine, clientNode); 2317 2318 dir.replaceINodeFile(src, file, cons); 2319 leaseManager.addLease(cons.getClientName(), src); 2320 2321 LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons); 2322 if (writeToEditLog) { 2323 getEditLog().logOpenFile(src, cons, logRetryCache); 2324 } 2325 return ret; 2326 } 2327 2328 /** 2329 * Recover lease; 2330 * Immediately revoke the lease of the current lease holder and start lease 2331 * recovery so that the file can be forced to be closed. 2332 * 2333 * @param src the path of the file to start lease recovery 2334 * @param holder the lease holder's name 2335 * @param clientMachine the client machine's name 2336 * @return true if the file is already closed 2337 * @throws IOException 2338 */ 2339 boolean recoverLease(String src, String holder, String clientMachine) 2340 throws IOException { 2341 if (!DFSUtil.isValidName(src)) { 2342 throw new IOException("Invalid file name: " + src); 2343 } 2344 2345 boolean skipSync = false; 2346 FSPermissionChecker pc = getPermissionChecker(); 2347 checkOperation(OperationCategory.WRITE); 2348 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2349 writeLock(); 2350 try { 2351 checkOperation(OperationCategory.WRITE); 2352 checkNameNodeSafeMode("Cannot recover the lease of " + src); 2353 src = FSDirectory.resolvePath(src, pathComponents, dir); 2354 final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src); 2355 if (!inode.isUnderConstruction()) { 2356 return true; 2357 } 2358 if (isPermissionEnabled) { 2359 checkPathAccess(pc, src, FsAction.WRITE); 2360 } 2361 2362 recoverLeaseInternal(inode, src, holder, clientMachine, true); 2363 } catch (StandbyException se) { 2364 skipSync = true; 2365 throw se; 2366 } finally { 2367 writeUnlock(); 2368 // There might be transactions logged while trying to recover the lease. 2369 // They need to be sync'ed even when an exception was thrown. 2370 if (!skipSync) { 2371 getEditLog().logSync(); 2372 } 2373 } 2374 return false; 2375 } 2376 2377 private void recoverLeaseInternal(INodeFile fileInode, 2378 String src, String holder, String clientMachine, boolean force) 2379 throws IOException { 2380 assert hasWriteLock(); 2381 if (fileInode != null && fileInode.isUnderConstruction()) { 2382 INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction) fileInode; 2383 // 2384 // If the file is under construction , then it must be in our 2385 // leases. Find the appropriate lease record. 2386 // 2387 Lease lease = leaseManager.getLease(holder); 2388 // 2389 // We found the lease for this file. And surprisingly the original 2390 // holder is trying to recreate this file. This should never occur. 2391 // 2392 if (!force && lease != null) { 2393 Lease leaseFile = leaseManager.getLeaseByPath(src); 2394 if ((leaseFile != null && leaseFile.equals(lease)) || 2395 lease.getHolder().equals(holder)) { 2396 throw new AlreadyBeingCreatedException( 2397 "failed to create file " + src + " for " + holder + 2398 " on client " + clientMachine + 2399 " because current leaseholder is trying to recreate file."); 2400 } 2401 } 2402 // 2403 // Find the original holder. 2404 // 2405 lease = leaseManager.getLease(pendingFile.getClientName()); 2406 if (lease == null) { 2407 throw new AlreadyBeingCreatedException( 2408 "failed to create file " + src + " for " + holder + 2409 " on client " + clientMachine + 2410 " because pendingCreates is non-null but no leases found."); 2411 } 2412 if (force) { 2413 // close now: no need to wait for soft lease expiration and 2414 // close only the file src 2415 LOG.info("recoverLease: " + lease + ", src=" + src + 2416 " from client " + pendingFile.getClientName()); 2417 internalReleaseLease(lease, src, holder); 2418 } else { 2419 assert lease.getHolder().equals(pendingFile.getClientName()) : 2420 "Current lease holder " + lease.getHolder() + 2421 " does not match file creator " + pendingFile.getClientName(); 2422 // 2423 // If the original holder has not renewed in the last SOFTLIMIT 2424 // period, then start lease recovery. 2425 // 2426 if (lease.expiredSoftLimit()) { 2427 LOG.info("startFile: recover " + lease + ", src=" + src + " client " 2428 + pendingFile.getClientName()); 2429 boolean isClosed = internalReleaseLease(lease, src, null); 2430 if(!isClosed) 2431 throw new RecoveryInProgressException( 2432 "Failed to close file " + src + 2433 ". Lease recovery is in progress. Try again later."); 2434 } else { 2435 final BlockInfo lastBlock = pendingFile.getLastBlock(); 2436 if (lastBlock != null 2437 && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) { 2438 throw new RecoveryInProgressException("Recovery in progress, file [" 2439 + src + "], " + "lease owner [" + lease.getHolder() + "]"); 2440 } else { 2441 throw new AlreadyBeingCreatedException("Failed to create file [" 2442 + src + "] for [" + holder + "] on client [" + clientMachine 2443 + "], because this file is already being created by [" 2444 + pendingFile.getClientName() + "] on [" 2445 + pendingFile.getClientMachine() + "]"); 2446 } 2447 } 2448 } 2449 } 2450 } 2451 2452 /** 2453 * Append to an existing file in the namespace. 2454 */ 2455 LocatedBlock appendFile(String src, String holder, String clientMachine) 2456 throws AccessControlException, SafeModeException, 2457 FileAlreadyExistsException, FileNotFoundException, 2458 ParentNotDirectoryException, IOException { 2459 LocatedBlock lb = null; 2460 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 2461 null); 2462 if (cacheEntry != null && cacheEntry.isSuccess()) { 2463 return (LocatedBlock) cacheEntry.getPayload(); 2464 } 2465 2466 boolean success = false; 2467 try { 2468 lb = appendFileInt(src, holder, clientMachine, cacheEntry != null); 2469 success = true; 2470 return lb; 2471 } catch (AccessControlException e) { 2472 logAuditEvent(false, "append", src); 2473 throw e; 2474 } finally { 2475 RetryCache.setState(cacheEntry, success, lb); 2476 } 2477 } 2478 2479 private LocatedBlock appendFileInt(String src, String holder, 2480 String clientMachine, boolean logRetryCache) 2481 throws AccessControlException, SafeModeException, 2482 FileAlreadyExistsException, FileNotFoundException, 2483 ParentNotDirectoryException, IOException { 2484 if (NameNode.stateChangeLog.isDebugEnabled()) { 2485 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src 2486 + ", holder=" + holder 2487 + ", clientMachine=" + clientMachine); 2488 } 2489 boolean skipSync = false; 2490 if (!supportAppends) { 2491 throw new UnsupportedOperationException( 2492 "Append is not enabled on this NameNode. Use the " + 2493 DFS_SUPPORT_APPEND_KEY + " configuration option to enable it."); 2494 } 2495 2496 LocatedBlock lb = null; 2497 FSPermissionChecker pc = getPermissionChecker(); 2498 checkOperation(OperationCategory.WRITE); 2499 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2500 writeLock(); 2501 try { 2502 checkOperation(OperationCategory.WRITE); 2503 checkNameNodeSafeMode("Cannot append to file" + src); 2504 src = FSDirectory.resolvePath(src, pathComponents, dir); 2505 lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache); 2506 } catch (StandbyException se) { 2507 skipSync = true; 2508 throw se; 2509 } finally { 2510 writeUnlock(); 2511 // There might be transactions logged while trying to recover the lease. 2512 // They need to be sync'ed even when an exception was thrown. 2513 if (!skipSync) { 2514 getEditLog().logSync(); 2515 } 2516 } 2517 if (lb != null) { 2518 if (NameNode.stateChangeLog.isDebugEnabled()) { 2519 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file " 2520 +src+" for "+holder+" at "+clientMachine 2521 +" block " + lb.getBlock() 2522 +" block size " + lb.getBlock().getNumBytes()); 2523 } 2524 } 2525 logAuditEvent(true, "append", src); 2526 return lb; 2527 } 2528 2529 ExtendedBlock getExtendedBlock(Block blk) { 2530 return new ExtendedBlock(blockPoolId, blk); 2531 } 2532 2533 void setBlockPoolId(String bpid) { 2534 blockPoolId = bpid; 2535 blockManager.setBlockPoolId(blockPoolId); 2536 } 2537 2538 /** 2539 * The client would like to obtain an additional block for the indicated 2540 * filename (which is being written-to). Return an array that consists 2541 * of the block, plus a set of machines. The first on this list should 2542 * be where the client writes data. Subsequent items in the list must 2543 * be provided in the connection to the first datanode. 2544 * 2545 * Make sure the previous blocks have been reported by datanodes and 2546 * are replicated. Will return an empty 2-elt array if we want the 2547 * client to "try again later". 2548 */ 2549 LocatedBlock getAdditionalBlock(String src, long fileId, String clientName, 2550 ExtendedBlock previous, Set<Node> excludedNodes, 2551 List<String> favoredNodes) 2552 throws LeaseExpiredException, NotReplicatedYetException, 2553 QuotaExceededException, SafeModeException, UnresolvedLinkException, 2554 IOException { 2555 long blockSize; 2556 int replication; 2557 DatanodeDescriptor clientNode = null; 2558 2559 if(NameNode.stateChangeLog.isDebugEnabled()) { 2560 NameNode.stateChangeLog.debug( 2561 "BLOCK* NameSystem.getAdditionalBlock: file " 2562 +src+" for "+clientName); 2563 } 2564 2565 // Part I. Analyze the state of the file with respect to the input data. 2566 checkOperation(OperationCategory.READ); 2567 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2568 readLock(); 2569 try { 2570 checkOperation(OperationCategory.READ); 2571 src = FSDirectory.resolvePath(src, pathComponents, dir); 2572 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 2573 final INode[] inodes = analyzeFileState( 2574 src, fileId, clientName, previous, onRetryBlock).getINodes(); 2575 final INodeFileUnderConstruction pendingFile = 2576 (INodeFileUnderConstruction) inodes[inodes.length - 1].asFile(); 2577 2578 if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) { 2579 // This is a retry. Just return the last block if having locations. 2580 return onRetryBlock[0]; 2581 } 2582 if (pendingFile.getBlocks().length >= maxBlocksPerFile) { 2583 throw new IOException("File has reached the limit on maximum number of" 2584 + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY 2585 + "): " + pendingFile.getBlocks().length + " >= " 2586 + maxBlocksPerFile); 2587 } 2588 blockSize = pendingFile.getPreferredBlockSize(); 2589 clientNode = pendingFile.getClientNode(); 2590 replication = pendingFile.getFileReplication(); 2591 } finally { 2592 readUnlock(); 2593 } 2594 2595 // choose targets for the new block to be allocated. 2596 final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget( 2597 src, replication, clientNode, excludedNodes, blockSize, favoredNodes); 2598 2599 // Part II. 2600 // Allocate a new block, add it to the INode and the BlocksMap. 2601 Block newBlock = null; 2602 long offset; 2603 checkOperation(OperationCategory.WRITE); 2604 writeLock(); 2605 try { 2606 checkOperation(OperationCategory.WRITE); 2607 // Run the full analysis again, since things could have changed 2608 // while chooseTarget() was executing. 2609 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 2610 INodesInPath inodesInPath = 2611 analyzeFileState(src, fileId, clientName, previous, onRetryBlock); 2612 final INode[] inodes = inodesInPath.getINodes(); 2613 final INodeFileUnderConstruction pendingFile = 2614 (INodeFileUnderConstruction) inodes[inodes.length - 1].asFile(); 2615 2616 if (onRetryBlock[0] != null) { 2617 if (onRetryBlock[0].getLocations().length > 0) { 2618 // This is a retry. Just return the last block if having locations. 2619 return onRetryBlock[0]; 2620 } else { 2621 // add new chosen targets to already allocated block and return 2622 BlockInfo lastBlockInFile = pendingFile.getLastBlock(); 2623 ((BlockInfoUnderConstruction) lastBlockInFile) 2624 .setExpectedLocations(targets); 2625 offset = pendingFile.computeFileSize(); 2626 return makeLocatedBlock(lastBlockInFile, targets, offset); 2627 } 2628 } 2629 2630 // commit the last block and complete it if it has minimum replicas 2631 commitOrCompleteLastBlock(pendingFile, 2632 ExtendedBlock.getLocalBlock(previous)); 2633 2634 // allocate new block, record block locations in INode. 2635 newBlock = createNewBlock(); 2636 saveAllocatedBlock(src, inodesInPath, newBlock, targets); 2637 2638 dir.persistNewBlock(src, pendingFile); 2639 offset = pendingFile.computeFileSize(); 2640 } finally { 2641 writeUnlock(); 2642 } 2643 getEditLog().logSync(); 2644 2645 // Return located block 2646 return makeLocatedBlock(newBlock, targets, offset); 2647 } 2648 2649 INodesInPath analyzeFileState(String src, 2650 long fileId, 2651 String clientName, 2652 ExtendedBlock previous, 2653 LocatedBlock[] onRetryBlock) 2654 throws IOException { 2655 assert hasReadLock(); 2656 2657 checkBlock(previous); 2658 onRetryBlock[0] = null; 2659 checkOperation(OperationCategory.WRITE); 2660 checkNameNodeSafeMode("Cannot add block to " + src); 2661 2662 // have we exceeded the configured limit of fs objects. 2663 checkFsObjectLimit(); 2664 2665 Block previousBlock = ExtendedBlock.getLocalBlock(previous); 2666 final INodesInPath iip = dir.getINodesInPath4Write(src); 2667 final INodeFileUnderConstruction pendingFile 2668 = checkLease(src, fileId, clientName, iip.getLastINode()); 2669 BlockInfo lastBlockInFile = pendingFile.getLastBlock(); 2670 if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) { 2671 // The block that the client claims is the current last block 2672 // doesn't match up with what we think is the last block. There are 2673 // four possibilities: 2674 // 1) This is the first block allocation of an append() pipeline 2675 // which started appending exactly at a block boundary. 2676 // In this case, the client isn't passed the previous block, 2677 // so it makes the allocateBlock() call with previous=null. 2678 // We can distinguish this since the last block of the file 2679 // will be exactly a full block. 2680 // 2) This is a retry from a client that missed the response of a 2681 // prior getAdditionalBlock() call, perhaps because of a network 2682 // timeout, or because of an HA failover. In that case, we know 2683 // by the fact that the client is re-issuing the RPC that it 2684 // never began to write to the old block. Hence it is safe to 2685 // to return the existing block. 2686 // 3) This is an entirely bogus request/bug -- we should error out 2687 // rather than potentially appending a new block with an empty 2688 // one in the middle, etc 2689 // 4) This is a retry from a client that timed out while 2690 // the prior getAdditionalBlock() is still being processed, 2691 // currently working on chooseTarget(). 2692 // There are no means to distinguish between the first and 2693 // the second attempts in Part I, because the first one hasn't 2694 // changed the namesystem state yet. 2695 // We run this analysis again in Part II where case 4 is impossible. 2696 2697 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock(); 2698 if (previous == null && 2699 lastBlockInFile != null && 2700 lastBlockInFile.getNumBytes() == pendingFile.getPreferredBlockSize() && 2701 lastBlockInFile.isComplete()) { 2702 // Case 1 2703 if (NameNode.stateChangeLog.isDebugEnabled()) { 2704 NameNode.stateChangeLog.debug( 2705 "BLOCK* NameSystem.allocateBlock: handling block allocation" + 2706 " writing to a file with a complete previous block: src=" + 2707 src + " lastBlock=" + lastBlockInFile); 2708 } 2709 } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) { 2710 if (lastBlockInFile.getNumBytes() != 0) { 2711 throw new IOException( 2712 "Request looked like a retry to allocate block " + 2713 lastBlockInFile + " but it already contains " + 2714 lastBlockInFile.getNumBytes() + " bytes"); 2715 } 2716 2717 // Case 2 2718 // Return the last block. 2719 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + 2720 "caught retry for allocation of a new block in " + 2721 src + ". Returning previously allocated block " + lastBlockInFile); 2722 long offset = pendingFile.computeFileSize(); 2723 onRetryBlock[0] = makeLocatedBlock(lastBlockInFile, 2724 ((BlockInfoUnderConstruction)lastBlockInFile).getExpectedStorageLocations(), 2725 offset); 2726 return iip; 2727 } else { 2728 // Case 3 2729 throw new IOException("Cannot allocate block in " + src + ": " + 2730 "passed 'previous' block " + previous + " does not match actual " + 2731 "last block in file " + lastBlockInFile); 2732 } 2733 } 2734 2735 // Check if the penultimate block is minimally replicated 2736 if (!checkFileProgress(pendingFile, false)) { 2737 throw new NotReplicatedYetException("Not replicated yet: " + src); 2738 } 2739 return iip; 2740 } 2741 2742 LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs, 2743 long offset) throws IOException { 2744 LocatedBlock lBlk = new LocatedBlock( 2745 getExtendedBlock(blk), locs, offset, false); 2746 getBlockManager().setBlockToken( 2747 lBlk, BlockTokenSecretManager.AccessMode.WRITE); 2748 return lBlk; 2749 } 2750 2751 /** @see NameNode#getAdditionalDatanode(String, ExtendedBlock, DatanodeInfo[], DatanodeInfo[], int, String) */ 2752 LocatedBlock getAdditionalDatanode(String src, final ExtendedBlock blk, 2753 final DatanodeInfo[] existings, final String[] storageIDs, 2754 final Set<Node> excludes, 2755 final int numAdditionalNodes, final String clientName 2756 ) throws IOException { 2757 //check if the feature is enabled 2758 dtpReplaceDatanodeOnFailure.checkEnabled(); 2759 2760 final DatanodeDescriptor clientnode; 2761 final long preferredblocksize; 2762 final List<DatanodeStorageInfo> chosen; 2763 checkOperation(OperationCategory.READ); 2764 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2765 readLock(); 2766 try { 2767 checkOperation(OperationCategory.READ); 2768 //check safe mode 2769 checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk); 2770 src = FSDirectory.resolvePath(src, pathComponents, dir); 2771 2772 //check lease 2773 final INodeFileUnderConstruction file = checkLease(src, clientName); 2774 clientnode = file.getClientNode(); 2775 preferredblocksize = file.getPreferredBlockSize(); 2776 2777 //find datanode storages 2778 final DatanodeManager dm = blockManager.getDatanodeManager(); 2779 chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs)); 2780 } finally { 2781 readUnlock(); 2782 } 2783 2784 // choose new datanodes. 2785 final DatanodeStorageInfo[] targets = blockManager.getBlockPlacementPolicy( 2786 ).chooseTarget(src, numAdditionalNodes, clientnode, chosen, true, 2787 // TODO: get storage type from the file 2788 excludes, preferredblocksize, StorageType.DEFAULT); 2789 final LocatedBlock lb = new LocatedBlock(blk, targets); 2790 blockManager.setBlockToken(lb, AccessMode.COPY); 2791 return lb; 2792 } 2793 2794 /** 2795 * The client would like to let go of the given block 2796 */ 2797 boolean abandonBlock(ExtendedBlock b, String src, String holder) 2798 throws LeaseExpiredException, FileNotFoundException, 2799 UnresolvedLinkException, IOException { 2800 if(NameNode.stateChangeLog.isDebugEnabled()) { 2801 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b 2802 + "of file " + src); 2803 } 2804 checkOperation(OperationCategory.WRITE); 2805 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2806 writeLock(); 2807 try { 2808 checkOperation(OperationCategory.WRITE); 2809 checkNameNodeSafeMode("Cannot abandon block " + b + " for fle" + src); 2810 src = FSDirectory.resolvePath(src, pathComponents, dir); 2811 2812 // 2813 // Remove the block from the pending creates list 2814 // 2815 INodeFileUnderConstruction file = checkLease(src, holder); 2816 boolean removed = dir.removeBlock(src, file, 2817 ExtendedBlock.getLocalBlock(b)); 2818 if (!removed) { 2819 return true; 2820 } 2821 if(NameNode.stateChangeLog.isDebugEnabled()) { 2822 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " 2823 + b + " is removed from pendingCreates"); 2824 } 2825 dir.persistBlocks(src, file, false); 2826 } finally { 2827 writeUnlock(); 2828 } 2829 getEditLog().logSync(); 2830 2831 return true; 2832 } 2833 2834 /** make sure that we still have the lease on this file. */ 2835 private INodeFileUnderConstruction checkLease(String src, String holder) 2836 throws LeaseExpiredException, UnresolvedLinkException, 2837 FileNotFoundException { 2838 return checkLease(src, INodeId.GRANDFATHER_INODE_ID, holder, 2839 dir.getINode(src)); 2840 } 2841 2842 private INodeFileUnderConstruction checkLease(String src, long fileId, 2843 String holder, INode inode) throws LeaseExpiredException, 2844 FileNotFoundException { 2845 assert hasReadLock(); 2846 if (inode == null || !inode.isFile()) { 2847 Lease lease = leaseManager.getLease(holder); 2848 throw new LeaseExpiredException( 2849 "No lease on " + src + ": File does not exist. " 2850 + (lease != null ? lease.toString() 2851 : "Holder " + holder + " does not have any open files.")); 2852 } 2853 final INodeFile file = inode.asFile(); 2854 if (!file.isUnderConstruction()) { 2855 Lease lease = leaseManager.getLease(holder); 2856 throw new LeaseExpiredException( 2857 "No lease on " + src + ": File is not open for writing. " 2858 + (lease != null ? lease.toString() 2859 : "Holder " + holder + " does not have any open files.")); 2860 } 2861 INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction)file; 2862 if (holder != null && !pendingFile.getClientName().equals(holder)) { 2863 throw new LeaseExpiredException("Lease mismatch on " + src + " owned by " 2864 + pendingFile.getClientName() + " but is accessed by " + holder); 2865 } 2866 INodeId.checkId(fileId, pendingFile); 2867 return pendingFile; 2868 } 2869 2870 /** 2871 * Complete in-progress write to the given file. 2872 * @return true if successful, false if the client should continue to retry 2873 * (e.g if not all blocks have reached minimum replication yet) 2874 * @throws IOException on error (eg lease mismatch, file not open, file deleted) 2875 */ 2876 boolean completeFile(String src, String holder, 2877 ExtendedBlock last, long fileId) 2878 throws SafeModeException, UnresolvedLinkException, IOException { 2879 if (NameNode.stateChangeLog.isDebugEnabled()) { 2880 NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " + 2881 src + " for " + holder); 2882 } 2883 checkBlock(last); 2884 boolean success = false; 2885 checkOperation(OperationCategory.WRITE); 2886 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2887 writeLock(); 2888 try { 2889 checkOperation(OperationCategory.WRITE); 2890 checkNameNodeSafeMode("Cannot complete file " + src); 2891 src = FSDirectory.resolvePath(src, pathComponents, dir); 2892 success = completeFileInternal(src, holder, 2893 ExtendedBlock.getLocalBlock(last), fileId); 2894 } finally { 2895 writeUnlock(); 2896 } 2897 getEditLog().logSync(); 2898 if (success) { 2899 NameNode.stateChangeLog.info("DIR* completeFile: " + src 2900 + " is closed by " + holder); 2901 } 2902 return success; 2903 } 2904 2905 private boolean completeFileInternal(String src, 2906 String holder, Block last, long fileId) throws SafeModeException, 2907 UnresolvedLinkException, IOException { 2908 assert hasWriteLock(); 2909 final INodesInPath iip = dir.getLastINodeInPath(src); 2910 final INodeFileUnderConstruction pendingFile; 2911 try { 2912 pendingFile = checkLease(src, fileId, holder, iip.getINode(0)); 2913 } catch (LeaseExpiredException lee) { 2914 final INode inode = dir.getINode(src); 2915 if (inode != null 2916 && inode.isFile() 2917 && !inode.asFile().isUnderConstruction()) { 2918 // This could be a retry RPC - i.e the client tried to close 2919 // the file, but missed the RPC response. Thus, it is trying 2920 // again to close the file. If the file still exists and 2921 // the client's view of the last block matches the actual 2922 // last block, then we'll treat it as a successful close. 2923 // See HDFS-3031. 2924 final Block realLastBlock = inode.asFile().getLastBlock(); 2925 if (Block.matchingIdAndGenStamp(last, realLastBlock)) { 2926 NameNode.stateChangeLog.info("DIR* completeFile: " + 2927 "request from " + holder + " to complete " + src + 2928 " which is already closed. But, it appears to be an RPC " + 2929 "retry. Returning success"); 2930 return true; 2931 } 2932 } 2933 throw lee; 2934 } 2935 // Check the state of the penultimate block. It should be completed 2936 // before attempting to complete the last one. 2937 if (!checkFileProgress(pendingFile, false)) { 2938 return false; 2939 } 2940 2941 // commit the last block and complete it if it has minimum replicas 2942 commitOrCompleteLastBlock(pendingFile, last); 2943 2944 if (!checkFileProgress(pendingFile, true)) { 2945 return false; 2946 } 2947 2948 finalizeINodeFileUnderConstruction(src, pendingFile, 2949 iip.getLatestSnapshot()); 2950 return true; 2951 } 2952 2953 /** 2954 * Save allocated block at the given pending filename 2955 * 2956 * @param src path to the file 2957 * @param inodesInPath representing each of the components of src. 2958 * The last INode is the INode for the file. 2959 * @throws QuotaExceededException If addition of block exceeds space quota 2960 */ 2961 BlockInfo saveAllocatedBlock(String src, INodesInPath inodes, 2962 Block newBlock, DatanodeStorageInfo[] targets) 2963 throws IOException { 2964 assert hasWriteLock(); 2965 BlockInfo b = dir.addBlock(src, inodes, newBlock, targets); 2966 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + src + ". " 2967 + getBlockPoolId() + " " + b); 2968 DatanodeStorageInfo.incrementBlocksScheduled(targets); 2969 return b; 2970 } 2971 2972 /** 2973 * Create new block with a unique block id and a new generation stamp. 2974 */ 2975 Block createNewBlock() throws IOException { 2976 assert hasWriteLock(); 2977 Block b = new Block(nextBlockId(), 0, 0); 2978 // Increment the generation stamp for every new block. 2979 b.setGenerationStamp(nextGenerationStamp(false)); 2980 return b; 2981 } 2982 2983 /** 2984 * Check that the indicated file's blocks are present and 2985 * replicated. If not, return false. If checkall is true, then check 2986 * all blocks, otherwise check only penultimate block. 2987 */ 2988 boolean checkFileProgress(INodeFile v, boolean checkall) { 2989 readLock(); 2990 try { 2991 if (checkall) { 2992 // 2993 // check all blocks of the file. 2994 // 2995 for (BlockInfo block: v.getBlocks()) { 2996 if (!block.isComplete()) { 2997 LOG.info("BLOCK* checkFileProgress: " + block 2998 + " has not reached minimal replication " 2999 + blockManager.minReplication); 3000 return false; 3001 } 3002 } 3003 } else { 3004 // 3005 // check the penultimate block of this file 3006 // 3007 BlockInfo b = v.getPenultimateBlock(); 3008 if (b != null && !b.isComplete()) { 3009 LOG.warn("BLOCK* checkFileProgress: " + b 3010 + " has not reached minimal replication " 3011 + blockManager.minReplication); 3012 return false; 3013 } 3014 } 3015 return true; 3016 } finally { 3017 readUnlock(); 3018 } 3019 } 3020 3021 //////////////////////////////////////////////////////////////// 3022 // Here's how to handle block-copy failure during client write: 3023 // -- As usual, the client's write should result in a streaming 3024 // backup write to a k-machine sequence. 3025 // -- If one of the backup machines fails, no worries. Fail silently. 3026 // -- Before client is allowed to close and finalize file, make sure 3027 // that the blocks are backed up. Namenode may have to issue specific backup 3028 // commands to make up for earlier datanode failures. Once all copies 3029 // are made, edit namespace and return to client. 3030 //////////////////////////////////////////////////////////////// 3031 3032 /** 3033 * Change the indicated filename. 3034 * @deprecated Use {@link #renameTo(String, String, Options.Rename...)} instead. 3035 */ 3036 @Deprecated 3037 boolean renameTo(String src, String dst) 3038 throws IOException, UnresolvedLinkException { 3039 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 3040 if (cacheEntry != null && cacheEntry.isSuccess()) { 3041 return true; // Return previous response 3042 } 3043 boolean ret = false; 3044 try { 3045 ret = renameToInt(src, dst, cacheEntry != null); 3046 } catch (AccessControlException e) { 3047 logAuditEvent(false, "rename", src, dst, null); 3048 throw e; 3049 } finally { 3050 RetryCache.setState(cacheEntry, ret); 3051 } 3052 return ret; 3053 } 3054 3055 private boolean renameToInt(String src, String dst, boolean logRetryCache) 3056 throws IOException, UnresolvedLinkException { 3057 if (NameNode.stateChangeLog.isDebugEnabled()) { 3058 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src + 3059 " to " + dst); 3060 } 3061 if (!DFSUtil.isValidName(dst)) { 3062 throw new IOException("Invalid name: " + dst); 3063 } 3064 FSPermissionChecker pc = getPermissionChecker(); 3065 checkOperation(OperationCategory.WRITE); 3066 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src); 3067 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst); 3068 boolean status = false; 3069 HdfsFileStatus resultingStat = null; 3070 writeLock(); 3071 try { 3072 checkOperation(OperationCategory.WRITE); 3073 checkNameNodeSafeMode("Cannot rename " + src); 3074 src = FSDirectory.resolvePath(src, srcComponents, dir); 3075 dst = FSDirectory.resolvePath(dst, dstComponents, dir); 3076 checkOperation(OperationCategory.WRITE); 3077 status = renameToInternal(pc, src, dst, logRetryCache); 3078 if (status) { 3079 resultingStat = getAuditFileInfo(dst, false); 3080 } 3081 } finally { 3082 writeUnlock(); 3083 } 3084 getEditLog().logSync(); 3085 if (status) { 3086 logAuditEvent(true, "rename", src, dst, resultingStat); 3087 } 3088 return status; 3089 } 3090 3091 /** @deprecated See {@link #renameTo(String, String)} */ 3092 @Deprecated 3093 private boolean renameToInternal(FSPermissionChecker pc, String src, 3094 String dst, boolean logRetryCache) throws IOException, 3095 UnresolvedLinkException { 3096 assert hasWriteLock(); 3097 if (isPermissionEnabled) { 3098 //We should not be doing this. This is move() not renameTo(). 3099 //but for now, 3100 //NOTE: yes, this is bad! it's assuming much lower level behavior 3101 // of rewriting the dst 3102 String actualdst = dir.isDir(dst)? 3103 dst + Path.SEPARATOR + new Path(src).getName(): dst; 3104 // Rename does not operates on link targets 3105 // Do not resolveLink when checking permissions of src and dst 3106 // Check write access to parent of src 3107 checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false); 3108 // Check write access to ancestor of dst 3109 checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null, 3110 false); 3111 } 3112 3113 if (dir.renameTo(src, dst, logRetryCache)) { 3114 return true; 3115 } 3116 return false; 3117 } 3118 3119 3120 /** Rename src to dst */ 3121 void renameTo(String src, String dst, Options.Rename... options) 3122 throws IOException, UnresolvedLinkException { 3123 if (NameNode.stateChangeLog.isDebugEnabled()) { 3124 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - " 3125 + src + " to " + dst); 3126 } 3127 if (!DFSUtil.isValidName(dst)) { 3128 throw new InvalidPathException("Invalid name: " + dst); 3129 } 3130 final FSPermissionChecker pc = getPermissionChecker(); 3131 3132 checkOperation(OperationCategory.WRITE); 3133 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 3134 if (cacheEntry != null && cacheEntry.isSuccess()) { 3135 return; // Return previous response 3136 } 3137 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src); 3138 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst); 3139 HdfsFileStatus resultingStat = null; 3140 boolean success = false; 3141 writeLock(); 3142 try { 3143 checkOperation(OperationCategory.WRITE); 3144 checkNameNodeSafeMode("Cannot rename " + src); 3145 src = FSDirectory.resolvePath(src, srcComponents, dir); 3146 dst = FSDirectory.resolvePath(dst, dstComponents, dir); 3147 renameToInternal(pc, src, dst, cacheEntry != null, options); 3148 resultingStat = getAuditFileInfo(dst, false); 3149 success = true; 3150 } finally { 3151 writeUnlock(); 3152 RetryCache.setState(cacheEntry, success); 3153 } 3154 getEditLog().logSync(); 3155 if (resultingStat != null) { 3156 StringBuilder cmd = new StringBuilder("rename options="); 3157 for (Rename option : options) { 3158 cmd.append(option.value()).append(" "); 3159 } 3160 logAuditEvent(true, cmd.toString(), src, dst, resultingStat); 3161 } 3162 } 3163 3164 private void renameToInternal(FSPermissionChecker pc, String src, String dst, 3165 boolean logRetryCache, Options.Rename... options) throws IOException { 3166 assert hasWriteLock(); 3167 if (isPermissionEnabled) { 3168 // Rename does not operates on link targets 3169 // Do not resolveLink when checking permissions of src and dst 3170 // Check write access to parent of src 3171 checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false); 3172 // Check write access to ancestor of dst 3173 checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false); 3174 } 3175 3176 dir.renameTo(src, dst, logRetryCache, options); 3177 } 3178 3179 /** 3180 * Remove the indicated file from namespace. 3181 * 3182 * @see ClientProtocol#delete(String, boolean) for detailed description and 3183 * description of exceptions 3184 */ 3185 boolean delete(String src, boolean recursive) 3186 throws AccessControlException, SafeModeException, 3187 UnresolvedLinkException, IOException { 3188 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 3189 if (cacheEntry != null && cacheEntry.isSuccess()) { 3190 return true; // Return previous response 3191 } 3192 boolean ret = false; 3193 try { 3194 ret = deleteInt(src, recursive, cacheEntry != null); 3195 } catch (AccessControlException e) { 3196 logAuditEvent(false, "delete", src); 3197 throw e; 3198 } finally { 3199 RetryCache.setState(cacheEntry, ret); 3200 } 3201 return ret; 3202 } 3203 3204 private boolean deleteInt(String src, boolean recursive, boolean logRetryCache) 3205 throws AccessControlException, SafeModeException, 3206 UnresolvedLinkException, IOException { 3207 if (NameNode.stateChangeLog.isDebugEnabled()) { 3208 NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src); 3209 } 3210 boolean status = deleteInternal(src, recursive, true, logRetryCache); 3211 if (status) { 3212 logAuditEvent(true, "delete", src); 3213 } 3214 return status; 3215 } 3216 3217 private FSPermissionChecker getPermissionChecker() 3218 throws AccessControlException { 3219 try { 3220 return new FSPermissionChecker(fsOwnerShortUserName, supergroup, getRemoteUser()); 3221 } catch (IOException ioe) { 3222 throw new AccessControlException(ioe); 3223 } 3224 } 3225 3226 /** 3227 * Remove a file/directory from the namespace. 3228 * <p> 3229 * For large directories, deletion is incremental. The blocks under 3230 * the directory are collected and deleted a small number at a time holding 3231 * the {@link FSNamesystem} lock. 3232 * <p> 3233 * For small directory or file the deletion is done in one shot. 3234 * 3235 * @see ClientProtocol#delete(String, boolean) for description of exceptions 3236 */ 3237 private boolean deleteInternal(String src, boolean recursive, 3238 boolean enforcePermission, boolean logRetryCache) 3239 throws AccessControlException, SafeModeException, UnresolvedLinkException, 3240 IOException { 3241 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); 3242 List<INode> removedINodes = new ChunkedArrayList<INode>(); 3243 FSPermissionChecker pc = getPermissionChecker(); 3244 checkOperation(OperationCategory.WRITE); 3245 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3246 boolean ret = false; 3247 writeLock(); 3248 try { 3249 checkOperation(OperationCategory.WRITE); 3250 checkNameNodeSafeMode("Cannot delete " + src); 3251 src = FSDirectory.resolvePath(src, pathComponents, dir); 3252 if (!recursive && dir.isNonEmptyDirectory(src)) { 3253 throw new IOException(src + " is non empty"); 3254 } 3255 if (enforcePermission && isPermissionEnabled) { 3256 checkPermission(pc, src, false, null, FsAction.WRITE, null, 3257 FsAction.ALL, false); 3258 } 3259 // Unlink the target directory from directory tree 3260 if (!dir.delete(src, collectedBlocks, removedINodes, logRetryCache)) { 3261 return false; 3262 } 3263 ret = true; 3264 } finally { 3265 writeUnlock(); 3266 } 3267 getEditLog().logSync(); 3268 removeBlocks(collectedBlocks); // Incremental deletion of blocks 3269 collectedBlocks.clear(); 3270 dir.writeLock(); 3271 try { 3272 dir.removeFromInodeMap(removedINodes); 3273 } finally { 3274 dir.writeUnlock(); 3275 } 3276 removedINodes.clear(); 3277 if (NameNode.stateChangeLog.isDebugEnabled()) { 3278 NameNode.stateChangeLog.debug("DIR* Namesystem.delete: " 3279 + src +" is removed"); 3280 } 3281 return ret; 3282 } 3283 3284 /** 3285 * From the given list, incrementally remove the blocks from blockManager 3286 * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to 3287 * ensure that other waiters on the lock can get in. See HDFS-2938 3288 * 3289 * @param blocks 3290 * An instance of {@link BlocksMapUpdateInfo} which contains a list 3291 * of blocks that need to be removed from blocksMap 3292 */ 3293 void removeBlocks(BlocksMapUpdateInfo blocks) { 3294 List<Block> toDeleteList = blocks.getToDeleteList(); 3295 Iterator<Block> iter = toDeleteList.iterator(); 3296 while (iter.hasNext()) { 3297 writeLock(); 3298 try { 3299 for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) { 3300 blockManager.removeBlock(iter.next()); 3301 } 3302 } finally { 3303 writeUnlock(); 3304 } 3305 } 3306 } 3307 3308 /** 3309 * Remove leases, inodes and blocks related to a given path 3310 * @param src The given path 3311 * @param blocks Containing the list of blocks to be deleted from blocksMap 3312 * @param removedINodes Containing the list of inodes to be removed from 3313 * inodesMap 3314 */ 3315 void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks, 3316 List<INode> removedINodes) { 3317 assert hasWriteLock(); 3318 leaseManager.removeLeaseWithPrefixPath(src); 3319 // remove inodes from inodesMap 3320 if (removedINodes != null) { 3321 dir.removeFromInodeMap(removedINodes); 3322 removedINodes.clear(); 3323 } 3324 if (blocks == null) { 3325 return; 3326 } 3327 3328 removeBlocksAndUpdateSafemodeTotal(blocks); 3329 } 3330 3331 /** 3332 * Removes the blocks from blocksmap and updates the safemode blocks total 3333 * 3334 * @param blocks 3335 * An instance of {@link BlocksMapUpdateInfo} which contains a list 3336 * of blocks that need to be removed from blocksMap 3337 */ 3338 void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) { 3339 assert hasWriteLock(); 3340 // In the case that we are a Standby tailing edits from the 3341 // active while in safe-mode, we need to track the total number 3342 // of blocks and safe blocks in the system. 3343 boolean trackBlockCounts = isSafeModeTrackingBlocks(); 3344 int numRemovedComplete = 0, numRemovedSafe = 0; 3345 3346 for (Block b : blocks.getToDeleteList()) { 3347 if (trackBlockCounts) { 3348 BlockInfo bi = getStoredBlock(b); 3349 if (bi.isComplete()) { 3350 numRemovedComplete++; 3351 if (bi.numNodes() >= blockManager.minReplication) { 3352 numRemovedSafe++; 3353 } 3354 } 3355 } 3356 blockManager.removeBlock(b); 3357 } 3358 if (trackBlockCounts) { 3359 if (LOG.isDebugEnabled()) { 3360 LOG.debug("Adjusting safe-mode totals for deletion." 3361 + "decreasing safeBlocks by " + numRemovedSafe 3362 + ", totalBlocks by " + numRemovedComplete); 3363 } 3364 adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete); 3365 } 3366 } 3367 3368 /** 3369 * @see SafeModeInfo#shouldIncrementallyTrackBlocks 3370 */ 3371 private boolean isSafeModeTrackingBlocks() { 3372 if (!haEnabled) { 3373 // Never track blocks incrementally in non-HA code. 3374 return false; 3375 } 3376 SafeModeInfo sm = this.safeMode; 3377 return sm != null && sm.shouldIncrementallyTrackBlocks(); 3378 } 3379 3380 /** 3381 * Get the file info for a specific file. 3382 * 3383 * @param src The string representation of the path to the file 3384 * @param resolveLink whether to throw UnresolvedLinkException 3385 * if src refers to a symlink 3386 * 3387 * @throws AccessControlException if access is denied 3388 * @throws UnresolvedLinkException if a symlink is encountered. 3389 * 3390 * @return object containing information regarding the file 3391 * or null if file not found 3392 * @throws StandbyException 3393 */ 3394 HdfsFileStatus getFileInfo(String src, boolean resolveLink) 3395 throws AccessControlException, UnresolvedLinkException, 3396 StandbyException, IOException { 3397 if (!DFSUtil.isValidName(src)) { 3398 throw new InvalidPathException("Invalid file name: " + src); 3399 } 3400 HdfsFileStatus stat = null; 3401 FSPermissionChecker pc = getPermissionChecker(); 3402 checkOperation(OperationCategory.READ); 3403 if (!DFSUtil.isValidName(src)) { 3404 throw new InvalidPathException("Invalid file name: " + src); 3405 } 3406 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3407 readLock(); 3408 try { 3409 checkOperation(OperationCategory.READ); 3410 src = FSDirectory.resolvePath(src, pathComponents, dir); 3411 if (isPermissionEnabled) { 3412 checkPermission(pc, src, false, null, null, null, null, resolveLink); 3413 } 3414 stat = dir.getFileInfo(src, resolveLink); 3415 } catch (AccessControlException e) { 3416 logAuditEvent(false, "getfileinfo", src); 3417 throw e; 3418 } finally { 3419 readUnlock(); 3420 } 3421 logAuditEvent(true, "getfileinfo", src); 3422 return stat; 3423 } 3424 3425 /** 3426 * Returns true if the file is closed 3427 */ 3428 boolean isFileClosed(String src) 3429 throws AccessControlException, UnresolvedLinkException, 3430 StandbyException, IOException { 3431 FSPermissionChecker pc = getPermissionChecker(); 3432 checkOperation(OperationCategory.READ); 3433 readLock(); 3434 try { 3435 checkOperation(OperationCategory.READ); 3436 if (isPermissionEnabled) { 3437 checkTraverse(pc, src); 3438 } 3439 return !INodeFile.valueOf(dir.getINode(src), src).isUnderConstruction(); 3440 } catch (AccessControlException e) { 3441 if (isAuditEnabled() && isExternalInvocation()) { 3442 logAuditEvent(false, "isFileClosed", src); 3443 } 3444 throw e; 3445 } finally { 3446 readUnlock(); 3447 } 3448 } 3449 3450 /** 3451 * Create all the necessary directories 3452 */ 3453 boolean mkdirs(String src, PermissionStatus permissions, 3454 boolean createParent) throws IOException, UnresolvedLinkException { 3455 boolean ret = false; 3456 try { 3457 ret = mkdirsInt(src, permissions, createParent); 3458 } catch (AccessControlException e) { 3459 logAuditEvent(false, "mkdirs", src); 3460 throw e; 3461 } 3462 return ret; 3463 } 3464 3465 private boolean mkdirsInt(String src, PermissionStatus permissions, 3466 boolean createParent) throws IOException, UnresolvedLinkException { 3467 if(NameNode.stateChangeLog.isDebugEnabled()) { 3468 NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src); 3469 } 3470 if (!DFSUtil.isValidName(src)) { 3471 throw new InvalidPathException(src); 3472 } 3473 FSPermissionChecker pc = getPermissionChecker(); 3474 checkOperation(OperationCategory.WRITE); 3475 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3476 HdfsFileStatus resultingStat = null; 3477 boolean status = false; 3478 writeLock(); 3479 try { 3480 checkOperation(OperationCategory.WRITE); 3481 checkNameNodeSafeMode("Cannot create directory " + src); 3482 src = FSDirectory.resolvePath(src, pathComponents, dir); 3483 status = mkdirsInternal(pc, src, permissions, createParent); 3484 if (status) { 3485 resultingStat = dir.getFileInfo(src, false); 3486 } 3487 } finally { 3488 writeUnlock(); 3489 } 3490 getEditLog().logSync(); 3491 if (status) { 3492 logAuditEvent(true, "mkdirs", src, null, resultingStat); 3493 } 3494 return status; 3495 } 3496 3497 /** 3498 * Create all the necessary directories 3499 */ 3500 private boolean mkdirsInternal(FSPermissionChecker pc, String src, 3501 PermissionStatus permissions, boolean createParent) 3502 throws IOException, UnresolvedLinkException { 3503 assert hasWriteLock(); 3504 if (isPermissionEnabled) { 3505 checkTraverse(pc, src); 3506 } 3507 if (dir.isDirMutable(src)) { 3508 // all the users of mkdirs() are used to expect 'true' even if 3509 // a new directory is not created. 3510 return true; 3511 } 3512 if (isPermissionEnabled) { 3513 checkAncestorAccess(pc, src, FsAction.WRITE); 3514 } 3515 if (!createParent) { 3516 verifyParentDir(src); 3517 } 3518 3519 // validate that we have enough inodes. This is, at best, a 3520 // heuristic because the mkdirs() operation might need to 3521 // create multiple inodes. 3522 checkFsObjectLimit(); 3523 3524 if (!dir.mkdirs(src, permissions, false, now())) { 3525 throw new IOException("Failed to create directory: " + src); 3526 } 3527 return true; 3528 } 3529 3530 /** 3531 * Get the content summary for a specific file/dir. 3532 * 3533 * @param src The string representation of the path to the file 3534 * 3535 * @throws AccessControlException if access is denied 3536 * @throws UnresolvedLinkException if a symlink is encountered. 3537 * @throws FileNotFoundException if no file exists 3538 * @throws StandbyException 3539 * @throws IOException for issues with writing to the audit log 3540 * 3541 * @return object containing information regarding the file 3542 * or null if file not found 3543 */ 3544 ContentSummary getContentSummary(String src) throws IOException { 3545 FSPermissionChecker pc = getPermissionChecker(); 3546 checkOperation(OperationCategory.READ); 3547 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3548 readLock(); 3549 boolean success = true; 3550 try { 3551 checkOperation(OperationCategory.READ); 3552 src = FSDirectory.resolvePath(src, pathComponents, dir); 3553 if (isPermissionEnabled) { 3554 checkPermission(pc, src, false, null, null, null, FsAction.READ_EXECUTE); 3555 } 3556 return dir.getContentSummary(src); 3557 3558 } catch (AccessControlException ace) { 3559 success = false; 3560 throw ace; 3561 } finally { 3562 readUnlock(); 3563 logAuditEvent(success, "contentSummary", src); 3564 } 3565 } 3566 3567 /** 3568 * Set the namespace quota and diskspace quota for a directory. 3569 * See {@link ClientProtocol#setQuota(String, long, long)} for the 3570 * contract. 3571 * 3572 * Note: This does not support ".inodes" relative path. 3573 */ 3574 void setQuota(String path, long nsQuota, long dsQuota) 3575 throws IOException, UnresolvedLinkException { 3576 checkSuperuserPrivilege(); 3577 checkOperation(OperationCategory.WRITE); 3578 writeLock(); 3579 try { 3580 checkOperation(OperationCategory.WRITE); 3581 checkNameNodeSafeMode("Cannot set quota on " + path); 3582 dir.setQuota(path, nsQuota, dsQuota); 3583 } finally { 3584 writeUnlock(); 3585 } 3586 getEditLog().logSync(); 3587 } 3588 3589 /** Persist all metadata about this file. 3590 * @param src The string representation of the path 3591 * @param clientName The string representation of the client 3592 * @param lastBlockLength The length of the last block 3593 * under construction reported from client. 3594 * @throws IOException if path does not exist 3595 */ 3596 void fsync(String src, String clientName, long lastBlockLength) 3597 throws IOException, UnresolvedLinkException { 3598 NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); 3599 checkOperation(OperationCategory.WRITE); 3600 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3601 writeLock(); 3602 try { 3603 checkOperation(OperationCategory.WRITE); 3604 checkNameNodeSafeMode("Cannot fsync file " + src); 3605 src = FSDirectory.resolvePath(src, pathComponents, dir); 3606 INodeFileUnderConstruction pendingFile = checkLease(src, clientName); 3607 if (lastBlockLength > 0) { 3608 pendingFile.updateLengthOfLastBlock(lastBlockLength); 3609 } 3610 dir.persistBlocks(src, pendingFile, false); 3611 } finally { 3612 writeUnlock(); 3613 } 3614 getEditLog().logSync(); 3615 } 3616 3617 /** 3618 * Move a file that is being written to be immutable. 3619 * @param src The filename 3620 * @param lease The lease for the client creating the file 3621 * @param recoveryLeaseHolder reassign lease to this holder if the last block 3622 * needs recovery; keep current holder if null. 3623 * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal 3624 * replication;<br> 3625 * RecoveryInProgressException if lease recovery is in progress.<br> 3626 * IOException in case of an error. 3627 * @return true if file has been successfully finalized and closed or 3628 * false if block recovery has been initiated. Since the lease owner 3629 * has been changed and logged, caller should call logSync(). 3630 */ 3631 boolean internalReleaseLease(Lease lease, String src, 3632 String recoveryLeaseHolder) throws AlreadyBeingCreatedException, 3633 IOException, UnresolvedLinkException { 3634 LOG.info("Recovering " + lease + ", src=" + src); 3635 assert !isInSafeMode(); 3636 assert hasWriteLock(); 3637 3638 final INodesInPath iip = dir.getLastINodeInPath(src); 3639 final INodeFileUnderConstruction pendingFile 3640 = INodeFileUnderConstruction.valueOf(iip.getINode(0), src); 3641 int nrBlocks = pendingFile.numBlocks(); 3642 BlockInfo[] blocks = pendingFile.getBlocks(); 3643 3644 int nrCompleteBlocks; 3645 BlockInfo curBlock = null; 3646 for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) { 3647 curBlock = blocks[nrCompleteBlocks]; 3648 if(!curBlock.isComplete()) 3649 break; 3650 assert blockManager.checkMinReplication(curBlock) : 3651 "A COMPLETE block is not minimally replicated in " + src; 3652 } 3653 3654 // If there are no incomplete blocks associated with this file, 3655 // then reap lease immediately and close the file. 3656 if(nrCompleteBlocks == nrBlocks) { 3657 finalizeINodeFileUnderConstruction(src, pendingFile, 3658 iip.getLatestSnapshot()); 3659 NameNode.stateChangeLog.warn("BLOCK*" 3660 + " internalReleaseLease: All existing blocks are COMPLETE," 3661 + " lease removed, file closed."); 3662 return true; // closed! 3663 } 3664 3665 // Only the last and the penultimate blocks may be in non COMPLETE state. 3666 // If the penultimate block is not COMPLETE, then it must be COMMITTED. 3667 if(nrCompleteBlocks < nrBlocks - 2 || 3668 nrCompleteBlocks == nrBlocks - 2 && 3669 curBlock != null && 3670 curBlock.getBlockUCState() != BlockUCState.COMMITTED) { 3671 final String message = "DIR* NameSystem.internalReleaseLease: " 3672 + "attempt to release a create lock on " 3673 + src + " but file is already closed."; 3674 NameNode.stateChangeLog.warn(message); 3675 throw new IOException(message); 3676 } 3677 3678 // The last block is not COMPLETE, and 3679 // that the penultimate block if exists is either COMPLETE or COMMITTED 3680 final BlockInfo lastBlock = pendingFile.getLastBlock(); 3681 BlockUCState lastBlockState = lastBlock.getBlockUCState(); 3682 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock(); 3683 boolean penultimateBlockMinReplication; 3684 BlockUCState penultimateBlockState; 3685 if (penultimateBlock == null) { 3686 penultimateBlockState = BlockUCState.COMPLETE; 3687 // If penultimate block doesn't exist then its minReplication is met 3688 penultimateBlockMinReplication = true; 3689 } else { 3690 penultimateBlockState = BlockUCState.COMMITTED; 3691 penultimateBlockMinReplication = 3692 blockManager.checkMinReplication(penultimateBlock); 3693 } 3694 assert penultimateBlockState == BlockUCState.COMPLETE || 3695 penultimateBlockState == BlockUCState.COMMITTED : 3696 "Unexpected state of penultimate block in " + src; 3697 3698 switch(lastBlockState) { 3699 case COMPLETE: 3700 assert false : "Already checked that the last block is incomplete"; 3701 break; 3702 case COMMITTED: 3703 // Close file if committed blocks are minimally replicated 3704 if(penultimateBlockMinReplication && 3705 blockManager.checkMinReplication(lastBlock)) { 3706 finalizeINodeFileUnderConstruction(src, pendingFile, 3707 iip.getLatestSnapshot()); 3708 NameNode.stateChangeLog.warn("BLOCK*" 3709 + " internalReleaseLease: Committed blocks are minimally replicated," 3710 + " lease removed, file closed."); 3711 return true; // closed! 3712 } 3713 // Cannot close file right now, since some blocks 3714 // are not yet minimally replicated. 3715 // This may potentially cause infinite loop in lease recovery 3716 // if there are no valid replicas on data-nodes. 3717 String message = "DIR* NameSystem.internalReleaseLease: " + 3718 "Failed to release lease for file " + src + 3719 ". Committed blocks are waiting to be minimally replicated." + 3720 " Try again later."; 3721 NameNode.stateChangeLog.warn(message); 3722 throw new AlreadyBeingCreatedException(message); 3723 case UNDER_CONSTRUCTION: 3724 case UNDER_RECOVERY: 3725 final BlockInfoUnderConstruction uc = (BlockInfoUnderConstruction)lastBlock; 3726 // setup the last block locations from the blockManager if not known 3727 if (uc.getNumExpectedLocations() == 0) { 3728 uc.setExpectedLocations(blockManager.getStorages(lastBlock)); 3729 } 3730 3731 if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) { 3732 // There is no datanode reported to this block. 3733 // may be client have crashed before writing data to pipeline. 3734 // This blocks doesn't need any recovery. 3735 // We can remove this block and close the file. 3736 pendingFile.removeLastBlock(lastBlock); 3737 finalizeINodeFileUnderConstruction(src, pendingFile, 3738 iip.getLatestSnapshot()); 3739 NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: " 3740 + "Removed empty last block and closed file."); 3741 return true; 3742 } 3743 // start recovery of the last block for this file 3744 long blockRecoveryId = nextGenerationStamp(isLegacyBlock(uc)); 3745 lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile); 3746 uc.initializeBlockRecovery(blockRecoveryId); 3747 leaseManager.renewLease(lease); 3748 // Cannot close file right now, since the last block requires recovery. 3749 // This may potentially cause infinite loop in lease recovery 3750 // if there are no valid replicas on data-nodes. 3751 NameNode.stateChangeLog.warn( 3752 "DIR* NameSystem.internalReleaseLease: " + 3753 "File " + src + " has not been closed." + 3754 " Lease recovery is in progress. " + 3755 "RecoveryId = " + blockRecoveryId + " for block " + lastBlock); 3756 break; 3757 } 3758 return false; 3759 } 3760 3761 private Lease reassignLease(Lease lease, String src, String newHolder, 3762 INodeFileUnderConstruction pendingFile) { 3763 assert hasWriteLock(); 3764 if(newHolder == null) 3765 return lease; 3766 // The following transaction is not synced. Make sure it's sync'ed later. 3767 logReassignLease(lease.getHolder(), src, newHolder); 3768 return reassignLeaseInternal(lease, src, newHolder, pendingFile); 3769 } 3770 3771 Lease reassignLeaseInternal(Lease lease, String src, String newHolder, 3772 INodeFileUnderConstruction pendingFile) { 3773 assert hasWriteLock(); 3774 pendingFile.setClientName(newHolder); 3775 return leaseManager.reassignLease(lease, src, newHolder); 3776 } 3777 3778 private void commitOrCompleteLastBlock(final INodeFileUnderConstruction fileINode, 3779 final Block commitBlock) throws IOException { 3780 assert hasWriteLock(); 3781 if (!blockManager.commitOrCompleteLastBlock(fileINode, commitBlock)) { 3782 return; 3783 } 3784 3785 // Adjust disk space consumption if required 3786 final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes(); 3787 if (diff > 0) { 3788 try { 3789 String path = leaseManager.findPath(fileINode); 3790 dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication()); 3791 } catch (IOException e) { 3792 LOG.warn("Unexpected exception while updating disk space.", e); 3793 } 3794 } 3795 } 3796 3797 private void finalizeINodeFileUnderConstruction(String src, 3798 INodeFileUnderConstruction pendingFile, Snapshot latestSnapshot) 3799 throws IOException, UnresolvedLinkException { 3800 assert hasWriteLock(); 3801 leaseManager.removeLease(pendingFile.getClientName(), src); 3802 3803 pendingFile = pendingFile.recordModification(latestSnapshot, 3804 dir.getINodeMap()); 3805 3806 // The file is no longer pending. 3807 // Create permanent INode, update blocks 3808 final INodeFile newFile = pendingFile.toINodeFile(now()); 3809 dir.replaceINodeFile(src, pendingFile, newFile); 3810 3811 // close file and persist block allocations for this file 3812 dir.closeFile(src, newFile); 3813 3814 blockManager.checkReplication(newFile); 3815 } 3816 3817 @VisibleForTesting 3818 BlockInfo getStoredBlock(Block block) { 3819 return blockManager.getStoredBlock(block); 3820 } 3821 3822 @Override 3823 public boolean isInSnapshot(BlockInfoUnderConstruction blockUC) { 3824 assert hasReadLock(); 3825 final BlockCollection bc = blockUC.getBlockCollection(); 3826 if (bc == null || !(bc instanceof INodeFileUnderConstruction)) { 3827 return false; 3828 } 3829 3830 INodeFileUnderConstruction inodeUC = (INodeFileUnderConstruction) blockUC 3831 .getBlockCollection(); 3832 String fullName = inodeUC.getName(); 3833 try { 3834 if (fullName != null && fullName.startsWith(Path.SEPARATOR) 3835 && dir.getINode(fullName) == inodeUC) { 3836 // If file exists in normal path then no need to look in snapshot 3837 return false; 3838 } 3839 } catch (UnresolvedLinkException e) { 3840 LOG.error("Error while resolving the link : " + fullName, e); 3841 return false; 3842 } 3843 /* 3844 * 1. if bc is an instance of INodeFileUnderConstructionWithSnapshot, and 3845 * bc is not in the current fsdirectory tree, bc must represent a snapshot 3846 * file. 3847 * 2. if fullName is not an absolute path, bc cannot be existent in the 3848 * current fsdirectory tree. 3849 * 3. if bc is not the current node associated with fullName, bc must be a 3850 * snapshot inode. 3851 */ 3852 return true; 3853 } 3854 3855 void commitBlockSynchronization(ExtendedBlock lastblock, 3856 long newgenerationstamp, long newlength, 3857 boolean closeFile, boolean deleteblock, DatanodeID[] newtargets, 3858 String[] newtargetstorages) 3859 throws IOException, UnresolvedLinkException { 3860 LOG.info("commitBlockSynchronization(lastblock=" + lastblock 3861 + ", newgenerationstamp=" + newgenerationstamp 3862 + ", newlength=" + newlength 3863 + ", newtargets=" + Arrays.asList(newtargets) 3864 + ", closeFile=" + closeFile 3865 + ", deleteBlock=" + deleteblock 3866 + ")"); 3867 checkOperation(OperationCategory.WRITE); 3868 String src = ""; 3869 writeLock(); 3870 try { 3871 checkOperation(OperationCategory.WRITE); 3872 // If a DN tries to commit to the standby, the recovery will 3873 // fail, and the next retry will succeed on the new NN. 3874 3875 checkNameNodeSafeMode( 3876 "Cannot commitBlockSynchronization while in safe mode"); 3877 final BlockInfo storedBlock = getStoredBlock( 3878 ExtendedBlock.getLocalBlock(lastblock)); 3879 if (storedBlock == null) { 3880 if (deleteblock) { 3881 // This may be a retry attempt so ignore the failure 3882 // to locate the block. 3883 if (LOG.isDebugEnabled()) { 3884 LOG.debug("Block (=" + lastblock + ") not found"); 3885 } 3886 return; 3887 } else { 3888 throw new IOException("Block (=" + lastblock + ") not found"); 3889 } 3890 } 3891 INodeFile iFile = ((INode)storedBlock.getBlockCollection()).asFile(); 3892 if (!iFile.isUnderConstruction() || storedBlock.isComplete()) { 3893 if (LOG.isDebugEnabled()) { 3894 LOG.debug("Unexpected block (=" + lastblock 3895 + ") since the file (=" + iFile.getLocalName() 3896 + ") is not under construction"); 3897 } 3898 return; 3899 } 3900 3901 long recoveryId = 3902 ((BlockInfoUnderConstruction)storedBlock).getBlockRecoveryId(); 3903 if(recoveryId != newgenerationstamp) { 3904 throw new IOException("The recovery id " + newgenerationstamp 3905 + " does not match current recovery id " 3906 + recoveryId + " for block " + lastblock); 3907 } 3908 3909 INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction)iFile; 3910 3911 if (deleteblock) { 3912 Block blockToDel = ExtendedBlock.getLocalBlock(lastblock); 3913 boolean remove = pendingFile.removeLastBlock(blockToDel); 3914 if (remove) { 3915 blockManager.removeBlockFromMap(storedBlock); 3916 } 3917 } 3918 else { 3919 // update last block 3920 storedBlock.setGenerationStamp(newgenerationstamp); 3921 storedBlock.setNumBytes(newlength); 3922 3923 // find the DatanodeDescriptor objects 3924 // There should be no locations in the blockManager till now because the 3925 // file is underConstruction 3926 ArrayList<DatanodeDescriptor> trimmedTargets = 3927 new ArrayList<DatanodeDescriptor>(newtargets.length); 3928 ArrayList<String> trimmedStorages = 3929 new ArrayList<String>(newtargets.length); 3930 if (newtargets.length > 0) { 3931 for (int i = 0; i < newtargets.length; ++i) { 3932 // try to get targetNode 3933 DatanodeDescriptor targetNode = 3934 blockManager.getDatanodeManager().getDatanode(newtargets[i]); 3935 if (targetNode != null) { 3936 trimmedTargets.add(targetNode); 3937 trimmedStorages.add(newtargetstorages[i]); 3938 } else if (LOG.isDebugEnabled()) { 3939 LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found"); 3940 } 3941 } 3942 } 3943 if ((closeFile) && !trimmedTargets.isEmpty()) { 3944 // the file is getting closed. Insert block locations into blockManager. 3945 // Otherwise fsck will report these blocks as MISSING, especially if the 3946 // blocksReceived from Datanodes take a long time to arrive. 3947 for (int i = 0; i < trimmedTargets.size(); i++) { 3948 trimmedTargets.get(i).addBlock( 3949 trimmedStorages.get(i), storedBlock); 3950 } 3951 } 3952 3953 // add pipeline locations into the INodeUnderConstruction 3954 DatanodeStorageInfo[] trimmedStorageInfos = 3955 blockManager.getDatanodeManager().getDatanodeStorageInfos( 3956 trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]), 3957 trimmedStorages.toArray(new String[trimmedStorages.size()])); 3958 pendingFile.setLastBlock(storedBlock, trimmedStorageInfos); 3959 } 3960 3961 if (closeFile) { 3962 src = closeFileCommitBlocks(pendingFile, storedBlock); 3963 } else { 3964 // If this commit does not want to close the file, persist blocks 3965 src = persistBlocks(pendingFile, false); 3966 } 3967 } finally { 3968 writeUnlock(); 3969 } 3970 getEditLog().logSync(); 3971 if (closeFile) { 3972 LOG.info("commitBlockSynchronization(newblock=" + lastblock 3973 + ", file=" + src 3974 + ", newgenerationstamp=" + newgenerationstamp 3975 + ", newlength=" + newlength 3976 + ", newtargets=" + Arrays.asList(newtargets) + ") successful"); 3977 } else { 3978 LOG.info("commitBlockSynchronization(" + lastblock + ") successful"); 3979 } 3980 } 3981 3982 /** 3983 * 3984 * @param pendingFile 3985 * @param storedBlock 3986 * @return Path of the file that was closed. 3987 * @throws IOException 3988 */ 3989 @VisibleForTesting 3990 String closeFileCommitBlocks(INodeFileUnderConstruction pendingFile, 3991 BlockInfo storedBlock) 3992 throws IOException { 3993 3994 String src = leaseManager.findPath(pendingFile); 3995 3996 // commit the last block and complete it if it has minimum replicas 3997 commitOrCompleteLastBlock(pendingFile, storedBlock); 3998 3999 //remove lease, close file 4000 finalizeINodeFileUnderConstruction(src, pendingFile, 4001 Snapshot.findLatestSnapshot(pendingFile, null)); 4002 4003 return src; 4004 } 4005 4006 /** 4007 * Persist the block list for the given file. 4008 * 4009 * @param pendingFile 4010 * @return Path to the given file. 4011 * @throws IOException 4012 */ 4013 @VisibleForTesting 4014 String persistBlocks(INodeFileUnderConstruction pendingFile, 4015 boolean logRetryCache) throws IOException { 4016 String src = leaseManager.findPath(pendingFile); 4017 dir.persistBlocks(src, pendingFile, logRetryCache); 4018 return src; 4019 } 4020 4021 /** 4022 * Renew the lease(s) held by the given client 4023 */ 4024 void renewLease(String holder) throws IOException { 4025 checkOperation(OperationCategory.WRITE); 4026 readLock(); 4027 try { 4028 checkOperation(OperationCategory.WRITE); 4029 checkNameNodeSafeMode("Cannot renew lease for " + holder); 4030 leaseManager.renewLease(holder); 4031 } finally { 4032 readUnlock(); 4033 } 4034 } 4035 4036 /** 4037 * Get a partial listing of the indicated directory 4038 * 4039 * @param src the directory name 4040 * @param startAfter the name to start after 4041 * @param needLocation if blockLocations need to be returned 4042 * @return a partial listing starting after startAfter 4043 * 4044 * @throws AccessControlException if access is denied 4045 * @throws UnresolvedLinkException if symbolic link is encountered 4046 * @throws IOException if other I/O error occurred 4047 */ 4048 DirectoryListing getListing(String src, byte[] startAfter, 4049 boolean needLocation) 4050 throws AccessControlException, UnresolvedLinkException, IOException { 4051 try { 4052 return getListingInt(src, startAfter, needLocation); 4053 } catch (AccessControlException e) { 4054 logAuditEvent(false, "listStatus", src); 4055 throw e; 4056 } 4057 } 4058 4059 private DirectoryListing getListingInt(String src, byte[] startAfter, 4060 boolean needLocation) 4061 throws AccessControlException, UnresolvedLinkException, IOException { 4062 DirectoryListing dl; 4063 FSPermissionChecker pc = getPermissionChecker(); 4064 checkOperation(OperationCategory.READ); 4065 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 4066 String startAfterString = new String(startAfter); 4067 readLock(); 4068 try { 4069 checkOperation(OperationCategory.READ); 4070 src = FSDirectory.resolvePath(src, pathComponents, dir); 4071 4072 // Get file name when startAfter is an INodePath 4073 if (FSDirectory.isReservedName(startAfterString)) { 4074 byte[][] startAfterComponents = FSDirectory 4075 .getPathComponentsForReservedPath(startAfterString); 4076 try { 4077 String tmp = FSDirectory.resolvePath(src, startAfterComponents, dir); 4078 byte[][] regularPath = INode.getPathComponents(tmp); 4079 startAfter = regularPath[regularPath.length - 1]; 4080 } catch (IOException e) { 4081 // Possibly the inode is deleted 4082 throw new DirectoryListingStartAfterNotFoundException( 4083 "Can't find startAfter " + startAfterString); 4084 } 4085 } 4086 4087 if (isPermissionEnabled) { 4088 if (dir.isDir(src)) { 4089 checkPathAccess(pc, src, FsAction.READ_EXECUTE); 4090 } else { 4091 checkTraverse(pc, src); 4092 } 4093 } 4094 logAuditEvent(true, "listStatus", src); 4095 dl = dir.getListing(src, startAfter, needLocation); 4096 } finally { 4097 readUnlock(); 4098 } 4099 return dl; 4100 } 4101 4102 ///////////////////////////////////////////////////////// 4103 // 4104 // These methods are called by datanodes 4105 // 4106 ///////////////////////////////////////////////////////// 4107 /** 4108 * Register Datanode. 4109 * <p> 4110 * The purpose of registration is to identify whether the new datanode 4111 * serves a new data storage, and will report new data block copies, 4112 * which the namenode was not aware of; or the datanode is a replacement 4113 * node for the data storage that was previously served by a different 4114 * or the same (in terms of host:port) datanode. 4115 * The data storages are distinguished by their storageIDs. When a new 4116 * data storage is reported the namenode issues a new unique storageID. 4117 * <p> 4118 * Finally, the namenode returns its namespaceID as the registrationID 4119 * for the datanodes. 4120 * namespaceID is a persistent attribute of the name space. 4121 * The registrationID is checked every time the datanode is communicating 4122 * with the namenode. 4123 * Datanodes with inappropriate registrationID are rejected. 4124 * If the namenode stops, and then restarts it can restore its 4125 * namespaceID and will continue serving the datanodes that has previously 4126 * registered with the namenode without restarting the whole cluster. 4127 * 4128 * @see org.apache.hadoop.hdfs.server.datanode.DataNode 4129 */ 4130 void registerDatanode(DatanodeRegistration nodeReg) throws IOException { 4131 writeLock(); 4132 try { 4133 getBlockManager().getDatanodeManager().registerDatanode(nodeReg); 4134 checkSafeMode(); 4135 } finally { 4136 writeUnlock(); 4137 } 4138 } 4139 4140 /** 4141 * Get registrationID for datanodes based on the namespaceID. 4142 * 4143 * @see #registerDatanode(DatanodeRegistration) 4144 * @return registration ID 4145 */ 4146 String getRegistrationID() { 4147 return Storage.getRegistrationID(dir.fsImage.getStorage()); 4148 } 4149 4150 /** 4151 * The given node has reported in. This method should: 4152 * 1) Record the heartbeat, so the datanode isn't timed out 4153 * 2) Adjust usage stats for future block allocation 4154 * 4155 * If a substantial amount of time passed since the last datanode 4156 * heartbeat then request an immediate block report. 4157 * 4158 * @return an array of datanode commands 4159 * @throws IOException 4160 */ 4161 HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, 4162 StorageReport[] reports, long cacheCapacity, long cacheUsed, 4163 int xceiverCount, int xmitsInProgress, int failedVolumes) 4164 throws IOException { 4165 readLock(); 4166 try { 4167 final int maxTransfer = blockManager.getMaxReplicationStreams() 4168 - xmitsInProgress; 4169 DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat( 4170 nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed, 4171 xceiverCount, maxTransfer, failedVolumes); 4172 return new HeartbeatResponse(cmds, createHaStatusHeartbeat()); 4173 } finally { 4174 readUnlock(); 4175 } 4176 } 4177 4178 private NNHAStatusHeartbeat createHaStatusHeartbeat() { 4179 HAState state = haContext.getState(); 4180 return new NNHAStatusHeartbeat(state.getServiceState(), 4181 getFSImage().getLastAppliedOrWrittenTxId()); 4182 } 4183 4184 /** 4185 * Returns whether or not there were available resources at the last check of 4186 * resources. 4187 * 4188 * @return true if there were sufficient resources available, false otherwise. 4189 */ 4190 boolean nameNodeHasResourcesAvailable() { 4191 return hasResourcesAvailable; 4192 } 4193 4194 /** 4195 * Perform resource checks and cache the results. 4196 * @throws IOException 4197 */ 4198 void checkAvailableResources() { 4199 Preconditions.checkState(nnResourceChecker != null, 4200 "nnResourceChecker not initialized"); 4201 hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace(); 4202 } 4203 4204 /** 4205 * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if 4206 * there are found to be insufficient resources available, causes the NN to 4207 * enter safe mode. If resources are later found to have returned to 4208 * acceptable levels, this daemon will cause the NN to exit safe mode. 4209 */ 4210 class NameNodeResourceMonitor implements Runnable { 4211 boolean shouldNNRmRun = true; 4212 @Override 4213 public void run () { 4214 try { 4215 while (fsRunning && shouldNNRmRun) { 4216 checkAvailableResources(); 4217 if(!nameNodeHasResourcesAvailable()) { 4218 String lowResourcesMsg = "NameNode low on available disk space. "; 4219 if (!isInSafeMode()) { 4220 FSNamesystem.LOG.warn(lowResourcesMsg + "Entering safe mode."); 4221 } else { 4222 FSNamesystem.LOG.warn(lowResourcesMsg + "Already in safe mode."); 4223 } 4224 enterSafeMode(true); 4225 } 4226 try { 4227 Thread.sleep(resourceRecheckInterval); 4228 } catch (InterruptedException ie) { 4229 // Deliberately ignore 4230 } 4231 } 4232 } catch (Exception e) { 4233 FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e); 4234 } 4235 } 4236 4237 public void stopMonitor() { 4238 shouldNNRmRun = false; 4239 } 4240 } 4241 4242 class NameNodeEditLogRoller implements Runnable { 4243 4244 private boolean shouldRun = true; 4245 private final long rollThreshold; 4246 private final long sleepIntervalMs; 4247 4248 public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) { 4249 this.rollThreshold = rollThreshold; 4250 this.sleepIntervalMs = sleepIntervalMs; 4251 } 4252 4253 @Override 4254 public void run() { 4255 while (fsRunning && shouldRun) { 4256 try { 4257 FSEditLog editLog = getFSImage().getEditLog(); 4258 long numEdits = 4259 editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId(); 4260 if (numEdits > rollThreshold) { 4261 FSNamesystem.LOG.info("NameNode rolling its own edit log because" 4262 + " number of edits in open segment exceeds threshold of " 4263 + rollThreshold); 4264 rollEditLog(); 4265 } 4266 Thread.sleep(sleepIntervalMs); 4267 } catch (InterruptedException e) { 4268 FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName() 4269 + " was interrupted, exiting"); 4270 break; 4271 } catch (Exception e) { 4272 FSNamesystem.LOG.error("Swallowing exception in " 4273 + NameNodeEditLogRoller.class.getSimpleName() + ":", e); 4274 } 4275 } 4276 } 4277 4278 public void stop() { 4279 shouldRun = false; 4280 } 4281 } 4282 4283 public FSImage getFSImage() { 4284 return dir.fsImage; 4285 } 4286 4287 public FSEditLog getEditLog() { 4288 return getFSImage().getEditLog(); 4289 } 4290 4291 private void checkBlock(ExtendedBlock block) throws IOException { 4292 if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) { 4293 throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId() 4294 + " - expected " + blockPoolId); 4295 } 4296 } 4297 4298 @Metric({"MissingBlocks", "Number of missing blocks"}) 4299 public long getMissingBlocksCount() { 4300 // not locking 4301 return blockManager.getMissingBlocksCount(); 4302 } 4303 4304 @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"}) 4305 public int getExpiredHeartbeats() { 4306 return datanodeStatistics.getExpiredHeartbeats(); 4307 } 4308 4309 @Metric({"TransactionsSinceLastCheckpoint", 4310 "Number of transactions since last checkpoint"}) 4311 public long getTransactionsSinceLastCheckpoint() { 4312 return getEditLog().getLastWrittenTxId() - 4313 getFSImage().getStorage().getMostRecentCheckpointTxId(); 4314 } 4315 4316 @Metric({"TransactionsSinceLastLogRoll", 4317 "Number of transactions since last edit log roll"}) 4318 public long getTransactionsSinceLastLogRoll() { 4319 if (isInStandbyState() || !getEditLog().isSegmentOpen()) { 4320 return 0; 4321 } else { 4322 return getEditLog().getLastWrittenTxId() - 4323 getEditLog().getCurSegmentTxId() + 1; 4324 } 4325 } 4326 4327 @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"}) 4328 public long getLastWrittenTransactionId() { 4329 return getEditLog().getLastWrittenTxId(); 4330 } 4331 4332 @Metric({"LastCheckpointTime", 4333 "Time in milliseconds since the epoch of the last checkpoint"}) 4334 public long getLastCheckpointTime() { 4335 return getFSImage().getStorage().getMostRecentCheckpointTime(); 4336 } 4337 4338 /** @see ClientProtocol#getStats() */ 4339 long[] getStats() { 4340 final long[] stats = datanodeStatistics.getStats(); 4341 stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks(); 4342 stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks(); 4343 stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount(); 4344 return stats; 4345 } 4346 4347 @Override // FSNamesystemMBean 4348 @Metric({"CapacityTotal", 4349 "Total raw capacity of data nodes in bytes"}) 4350 public long getCapacityTotal() { 4351 return datanodeStatistics.getCapacityTotal(); 4352 } 4353 4354 @Metric({"CapacityTotalGB", 4355 "Total raw capacity of data nodes in GB"}) 4356 public float getCapacityTotalGB() { 4357 return DFSUtil.roundBytesToGB(getCapacityTotal()); 4358 } 4359 4360 @Override // FSNamesystemMBean 4361 @Metric({"CapacityUsed", 4362 "Total used capacity across all data nodes in bytes"}) 4363 public long getCapacityUsed() { 4364 return datanodeStatistics.getCapacityUsed(); 4365 } 4366 4367 @Metric({"CapacityUsedGB", 4368 "Total used capacity across all data nodes in GB"}) 4369 public float getCapacityUsedGB() { 4370 return DFSUtil.roundBytesToGB(getCapacityUsed()); 4371 } 4372 4373 @Override // FSNamesystemMBean 4374 @Metric({"CapacityRemaining", "Remaining capacity in bytes"}) 4375 public long getCapacityRemaining() { 4376 return datanodeStatistics.getCapacityRemaining(); 4377 } 4378 4379 @Metric({"CapacityRemainingGB", "Remaining capacity in GB"}) 4380 public float getCapacityRemainingGB() { 4381 return DFSUtil.roundBytesToGB(getCapacityRemaining()); 4382 } 4383 4384 @Metric({"CapacityUsedNonDFS", 4385 "Total space used by data nodes for non DFS purposes in bytes"}) 4386 public long getCapacityUsedNonDFS() { 4387 return datanodeStatistics.getCapacityUsedNonDFS(); 4388 } 4389 4390 /** 4391 * Total number of connections. 4392 */ 4393 @Override // FSNamesystemMBean 4394 @Metric 4395 public int getTotalLoad() { 4396 return datanodeStatistics.getXceiverCount(); 4397 } 4398 4399 @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" }) 4400 public int getNumSnapshottableDirs() { 4401 return this.snapshotManager.getNumSnapshottableDirs(); 4402 } 4403 4404 @Metric({ "Snapshots", "The number of snapshots" }) 4405 public int getNumSnapshots() { 4406 return this.snapshotManager.getNumSnapshots(); 4407 } 4408 4409 @Override 4410 public String getSnapshotStats() { 4411 Map<String, Object> info = new HashMap<String, Object>(); 4412 info.put("SnapshottableDirectories", this.getNumSnapshottableDirs()); 4413 info.put("Snapshots", this.getNumSnapshots()); 4414 return JSON.toString(info); 4415 } 4416 4417 int getNumberOfDatanodes(DatanodeReportType type) { 4418 readLock(); 4419 try { 4420 return getBlockManager().getDatanodeManager().getDatanodeListForReport( 4421 type).size(); 4422 } finally { 4423 readUnlock(); 4424 } 4425 } 4426 4427 DatanodeInfo[] datanodeReport(final DatanodeReportType type 4428 ) throws AccessControlException, StandbyException { 4429 checkSuperuserPrivilege(); 4430 checkOperation(OperationCategory.UNCHECKED); 4431 readLock(); 4432 try { 4433 checkOperation(OperationCategory.UNCHECKED); 4434 final DatanodeManager dm = getBlockManager().getDatanodeManager(); 4435 final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type); 4436 4437 DatanodeInfo[] arr = new DatanodeInfo[results.size()]; 4438 for (int i=0; i<arr.length; i++) { 4439 arr[i] = new DatanodeInfo(results.get(i)); 4440 } 4441 return arr; 4442 } finally { 4443 readUnlock(); 4444 } 4445 } 4446 4447 /** 4448 * Save namespace image. 4449 * This will save current namespace into fsimage file and empty edits file. 4450 * Requires superuser privilege and safe mode. 4451 * 4452 * @throws AccessControlException if superuser privilege is violated. 4453 * @throws IOException if 4454 */ 4455 void saveNamespace() throws AccessControlException, IOException { 4456 checkOperation(OperationCategory.UNCHECKED); 4457 checkSuperuserPrivilege(); 4458 4459 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 4460 if (cacheEntry != null && cacheEntry.isSuccess()) { 4461 return; // Return previous response 4462 } 4463 boolean success = false; 4464 readLock(); 4465 try { 4466 checkOperation(OperationCategory.UNCHECKED); 4467 if (!isInSafeMode()) { 4468 throw new IOException("Safe mode should be turned ON " 4469 + "in order to create namespace image."); 4470 } 4471 getFSImage().saveNamespace(this); 4472 success = true; 4473 } finally { 4474 readUnlock(); 4475 RetryCache.setState(cacheEntry, success); 4476 } 4477 LOG.info("New namespace image has been created"); 4478 } 4479 4480 /** 4481 * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again. 4482 * Requires superuser privilege. 4483 * 4484 * @throws AccessControlException if superuser privilege is violated. 4485 */ 4486 boolean restoreFailedStorage(String arg) throws AccessControlException, 4487 StandbyException { 4488 checkSuperuserPrivilege(); 4489 checkOperation(OperationCategory.UNCHECKED); 4490 writeLock(); 4491 try { 4492 checkOperation(OperationCategory.UNCHECKED); 4493 4494 // if it is disabled - enable it and vice versa. 4495 if(arg.equals("check")) 4496 return getFSImage().getStorage().getRestoreFailedStorage(); 4497 4498 boolean val = arg.equals("true"); // false if not 4499 getFSImage().getStorage().setRestoreFailedStorage(val); 4500 4501 return val; 4502 } finally { 4503 writeUnlock(); 4504 } 4505 } 4506 4507 Date getStartTime() { 4508 return new Date(startTime); 4509 } 4510 4511 void finalizeUpgrade() throws IOException { 4512 checkSuperuserPrivilege(); 4513 checkOperation(OperationCategory.WRITE); 4514 writeLock(); 4515 try { 4516 checkOperation(OperationCategory.WRITE); 4517 getFSImage().finalizeUpgrade(); 4518 } finally { 4519 writeUnlock(); 4520 } 4521 } 4522 4523 void refreshNodes() throws IOException { 4524 checkOperation(OperationCategory.UNCHECKED); 4525 checkSuperuserPrivilege(); 4526 getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration()); 4527 } 4528 4529 void setBalancerBandwidth(long bandwidth) throws IOException { 4530 checkOperation(OperationCategory.UNCHECKED); 4531 checkSuperuserPrivilege(); 4532 getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth); 4533 } 4534 4535 /** 4536 * SafeModeInfo contains information related to the safe mode. 4537 * <p> 4538 * An instance of {@link SafeModeInfo} is created when the name node 4539 * enters safe mode. 4540 * <p> 4541 * During name node startup {@link SafeModeInfo} counts the number of 4542 * <em>safe blocks</em>, those that have at least the minimal number of 4543 * replicas, and calculates the ratio of safe blocks to the total number 4544 * of blocks in the system, which is the size of blocks in 4545 * {@link FSNamesystem#blockManager}. When the ratio reaches the 4546 * {@link #threshold} it starts the SafeModeMonitor daemon in order 4547 * to monitor whether the safe mode {@link #extension} is passed. 4548 * Then it leaves safe mode and destroys itself. 4549 * <p> 4550 * If safe mode is turned on manually then the number of safe blocks is 4551 * not tracked because the name node is not intended to leave safe mode 4552 * automatically in the case. 4553 * 4554 * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean) 4555 */ 4556 public class SafeModeInfo { 4557 // configuration fields 4558 /** Safe mode threshold condition %.*/ 4559 private double threshold; 4560 /** Safe mode minimum number of datanodes alive */ 4561 private int datanodeThreshold; 4562 /** Safe mode extension after the threshold. */ 4563 private int extension; 4564 /** Min replication required by safe mode. */ 4565 private int safeReplication; 4566 /** threshold for populating needed replication queues */ 4567 private double replQueueThreshold; 4568 4569 // internal fields 4570 /** Time when threshold was reached. 4571 * <br> -1 safe mode is off 4572 * <br> 0 safe mode is on, and threshold is not reached yet 4573 * <br> >0 safe mode is on, but we are in extension period 4574 */ 4575 private long reached = -1; 4576 /** Total number of blocks. */ 4577 int blockTotal; 4578 /** Number of safe blocks. */ 4579 int blockSafe; 4580 /** Number of blocks needed to satisfy safe mode threshold condition */ 4581 private int blockThreshold; 4582 /** Number of blocks needed before populating replication queues */ 4583 private int blockReplQueueThreshold; 4584 /** time of the last status printout */ 4585 private long lastStatusReport = 0; 4586 /** flag indicating whether replication queues have been initialized */ 4587 boolean initializedReplQueues = false; 4588 /** Was safemode entered automatically because available resources were low. */ 4589 private boolean resourcesLow = false; 4590 /** Should safemode adjust its block totals as blocks come in */ 4591 private boolean shouldIncrementallyTrackBlocks = false; 4592 /** counter for tracking startup progress of reported blocks */ 4593 private Counter awaitingReportedBlocksCounter; 4594 4595 /** 4596 * Creates SafeModeInfo when the name node enters 4597 * automatic safe mode at startup. 4598 * 4599 * @param conf configuration 4600 */ 4601 private SafeModeInfo(Configuration conf) { 4602 this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, 4603 DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT); 4604 if(threshold > 1.0) { 4605 LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold); 4606 } 4607 this.datanodeThreshold = conf.getInt( 4608 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 4609 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT); 4610 this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0); 4611 this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY, 4612 DFS_NAMENODE_REPLICATION_MIN_DEFAULT); 4613 4614 LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold); 4615 LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold); 4616 LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + " = " + extension); 4617 4618 // default to safe mode threshold (i.e., don't populate queues before leaving safe mode) 4619 this.replQueueThreshold = 4620 conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 4621 (float) threshold); 4622 this.blockTotal = 0; 4623 this.blockSafe = 0; 4624 } 4625 4626 /** 4627 * In the HA case, the StandbyNode can be in safemode while the namespace 4628 * is modified by the edit log tailer. In this case, the number of total 4629 * blocks changes as edits are processed (eg blocks are added and deleted). 4630 * However, we don't want to do the incremental tracking during the 4631 * startup-time loading process -- only once the initial total has been 4632 * set after the image has been loaded. 4633 */ 4634 private boolean shouldIncrementallyTrackBlocks() { 4635 return shouldIncrementallyTrackBlocks; 4636 } 4637 4638 /** 4639 * Creates SafeModeInfo when safe mode is entered manually, or because 4640 * available resources are low. 4641 * 4642 * The {@link #threshold} is set to 1.5 so that it could never be reached. 4643 * {@link #blockTotal} is set to -1 to indicate that safe mode is manual. 4644 * 4645 * @see SafeModeInfo 4646 */ 4647 private SafeModeInfo(boolean resourcesLow, boolean isReplQueuesInited) { 4648 this.threshold = 1.5f; // this threshold can never be reached 4649 this.datanodeThreshold = Integer.MAX_VALUE; 4650 this.extension = Integer.MAX_VALUE; 4651 this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication 4652 this.replQueueThreshold = 1.5f; // can never be reached 4653 this.blockTotal = -1; 4654 this.blockSafe = -1; 4655 this.resourcesLow = resourcesLow; 4656 this.initializedReplQueues = isReplQueuesInited; 4657 enter(); 4658 reportStatus("STATE* Safe mode is ON.", true); 4659 } 4660 4661 /** 4662 * Check if safe mode is on. 4663 * @return true if in safe mode 4664 */ 4665 private synchronized boolean isOn() { 4666 doConsistencyCheck(); 4667 return this.reached >= 0; 4668 } 4669 4670 /** 4671 * Check if we are populating replication queues. 4672 */ 4673 private synchronized boolean isPopulatingReplQueues() { 4674 return initializedReplQueues; 4675 } 4676 4677 /** 4678 * Enter safe mode. 4679 */ 4680 private void enter() { 4681 this.reached = 0; 4682 } 4683 4684 /** 4685 * Leave safe mode. 4686 * <p> 4687 * Check for invalid, under- & over-replicated blocks in the end of startup. 4688 */ 4689 private synchronized void leave() { 4690 // if not done yet, initialize replication queues. 4691 // In the standby, do not populate repl queues 4692 if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) { 4693 initializeReplQueues(); 4694 } 4695 long timeInSafemode = now() - startTime; 4696 NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 4697 + timeInSafemode/1000 + " secs"); 4698 NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode); 4699 4700 //Log the following only once (when transitioning from ON -> OFF) 4701 if (reached >= 0) { 4702 NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 4703 } 4704 reached = -1; 4705 safeMode = null; 4706 final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology(); 4707 NameNode.stateChangeLog.info("STATE* Network topology has " 4708 + nt.getNumOfRacks() + " racks and " 4709 + nt.getNumOfLeaves() + " datanodes"); 4710 NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has " 4711 + blockManager.numOfUnderReplicatedBlocks() + " blocks"); 4712 4713 startSecretManagerIfNecessary(); 4714 4715 // If startup has not yet completed, end safemode phase. 4716 StartupProgress prog = NameNode.getStartupProgress(); 4717 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 4718 prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS); 4719 prog.endPhase(Phase.SAFEMODE); 4720 } 4721 } 4722 4723 /** 4724 * Initialize replication queues. 4725 */ 4726 private synchronized void initializeReplQueues() { 4727 LOG.info("initializing replication queues"); 4728 assert !isPopulatingReplQueues() : "Already initialized repl queues"; 4729 long startTimeMisReplicatedScan = now(); 4730 blockManager.processMisReplicatedBlocks(); 4731 initializedReplQueues = true; 4732 NameNode.stateChangeLog.info("STATE* Replication Queue initialization " 4733 + "scan for invalid, over- and under-replicated blocks " 4734 + "completed in " + (now() - startTimeMisReplicatedScan) 4735 + " msec"); 4736 } 4737 4738 /** 4739 * Check whether we have reached the threshold for 4740 * initializing replication queues. 4741 */ 4742 private synchronized boolean canInitializeReplQueues() { 4743 return shouldPopulateReplQueues() 4744 && blockSafe >= blockReplQueueThreshold; 4745 } 4746 4747 /** 4748 * Safe mode can be turned off iff 4749 * the threshold is reached and 4750 * the extension time have passed. 4751 * @return true if can leave or false otherwise. 4752 */ 4753 private synchronized boolean canLeave() { 4754 if (reached == 0) 4755 return false; 4756 if (now() - reached < extension) { 4757 reportStatus("STATE* Safe mode ON.", false); 4758 return false; 4759 } 4760 return !needEnter(); 4761 } 4762 4763 /** 4764 * There is no need to enter safe mode 4765 * if DFS is empty or {@link #threshold} == 0 4766 */ 4767 private boolean needEnter() { 4768 return (threshold != 0 && blockSafe < blockThreshold) || 4769 (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) || 4770 (!nameNodeHasResourcesAvailable()); 4771 } 4772 4773 /** 4774 * Check and trigger safe mode if needed. 4775 */ 4776 private void checkMode() { 4777 // Have to have write-lock since leaving safemode initializes 4778 // repl queues, which requires write lock 4779 assert hasWriteLock(); 4780 // if smmthread is already running, the block threshold must have been 4781 // reached before, there is no need to enter the safe mode again 4782 if (smmthread == null && needEnter()) { 4783 enter(); 4784 // check if we are ready to initialize replication queues 4785 if (canInitializeReplQueues() && !isPopulatingReplQueues()) { 4786 initializeReplQueues(); 4787 } 4788 reportStatus("STATE* Safe mode ON.", false); 4789 return; 4790 } 4791 // the threshold is reached or was reached before 4792 if (!isOn() || // safe mode is off 4793 extension <= 0 || threshold <= 0) { // don't need to wait 4794 this.leave(); // leave safe mode 4795 return; 4796 } 4797 if (reached > 0) { // threshold has already been reached before 4798 reportStatus("STATE* Safe mode ON.", false); 4799 return; 4800 } 4801 // start monitor 4802 reached = now(); 4803 if (smmthread == null) { 4804 smmthread = new Daemon(new SafeModeMonitor()); 4805 smmthread.start(); 4806 reportStatus("STATE* Safe mode extension entered.", true); 4807 } 4808 4809 // check if we are ready to initialize replication queues 4810 if (canInitializeReplQueues() && !isPopulatingReplQueues()) { 4811 initializeReplQueues(); 4812 } 4813 } 4814 4815 /** 4816 * Set total number of blocks. 4817 */ 4818 private synchronized void setBlockTotal(int total) { 4819 this.blockTotal = total; 4820 this.blockThreshold = (int) (blockTotal * threshold); 4821 this.blockReplQueueThreshold = 4822 (int) (blockTotal * replQueueThreshold); 4823 if (haEnabled) { 4824 // After we initialize the block count, any further namespace 4825 // modifications done while in safe mode need to keep track 4826 // of the number of total blocks in the system. 4827 this.shouldIncrementallyTrackBlocks = true; 4828 } 4829 if(blockSafe < 0) 4830 this.blockSafe = 0; 4831 checkMode(); 4832 } 4833 4834 /** 4835 * Increment number of safe blocks if current block has 4836 * reached minimal replication. 4837 * @param replication current replication 4838 */ 4839 private synchronized void incrementSafeBlockCount(short replication) { 4840 if (replication == safeReplication) { 4841 this.blockSafe++; 4842 4843 // Report startup progress only if we haven't completed startup yet. 4844 StartupProgress prog = NameNode.getStartupProgress(); 4845 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 4846 if (this.awaitingReportedBlocksCounter == null) { 4847 this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE, 4848 STEP_AWAITING_REPORTED_BLOCKS); 4849 } 4850 this.awaitingReportedBlocksCounter.increment(); 4851 } 4852 4853 checkMode(); 4854 } 4855 } 4856 4857 /** 4858 * Decrement number of safe blocks if current block has 4859 * fallen below minimal replication. 4860 * @param replication current replication 4861 */ 4862 private synchronized void decrementSafeBlockCount(short replication) { 4863 if (replication == safeReplication-1) { 4864 this.blockSafe--; 4865 //blockSafe is set to -1 in manual / low resources safemode 4866 assert blockSafe >= 0 || isManual() || areResourcesLow(); 4867 checkMode(); 4868 } 4869 } 4870 4871 /** 4872 * Check if safe mode was entered manually 4873 */ 4874 private boolean isManual() { 4875 return extension == Integer.MAX_VALUE; 4876 } 4877 4878 /** 4879 * Set manual safe mode. 4880 */ 4881 private synchronized void setManual() { 4882 extension = Integer.MAX_VALUE; 4883 } 4884 4885 /** 4886 * Check if safe mode was entered due to resources being low. 4887 */ 4888 private boolean areResourcesLow() { 4889 return resourcesLow; 4890 } 4891 4892 /** 4893 * Set that resources are low for this instance of safe mode. 4894 */ 4895 private void setResourcesLow() { 4896 resourcesLow = true; 4897 } 4898 4899 /** 4900 * A tip on how safe mode is to be turned off: manually or automatically. 4901 */ 4902 String getTurnOffTip() { 4903 if(!isOn()) 4904 return "Safe mode is OFF."; 4905 4906 //Manual OR low-resource safemode. (Admin intervention required) 4907 String leaveMsg = "It was turned on manually. "; 4908 if (areResourcesLow()) { 4909 leaveMsg = "Resources are low on NN. Please add or free up more " 4910 + "resources then turn off safe mode manually. NOTE: If you turn off" 4911 + " safe mode before adding resources, " 4912 + "the NN will immediately return to safe mode. "; 4913 } 4914 if (isManual() || areResourcesLow()) { 4915 return leaveMsg 4916 + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off."; 4917 } 4918 4919 //Automatic safemode. System will come out of safemode automatically. 4920 leaveMsg = "Safe mode will be turned off automatically"; 4921 int numLive = getNumLiveDataNodes(); 4922 String msg = ""; 4923 if (reached == 0) { 4924 if (blockSafe < blockThreshold) { 4925 msg += String.format( 4926 "The reported blocks %d needs additional %d" 4927 + " blocks to reach the threshold %.4f of total blocks %d.\n", 4928 blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal); 4929 } 4930 if (numLive < datanodeThreshold) { 4931 msg += String.format( 4932 "The number of live datanodes %d needs an additional %d live " 4933 + "datanodes to reach the minimum number %d.\n", 4934 numLive, (datanodeThreshold - numLive), datanodeThreshold); 4935 } 4936 } else { 4937 msg = String.format("The reported blocks %d has reached the threshold" 4938 + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal); 4939 4940 msg += String.format("The number of live datanodes %d has reached " 4941 + "the minimum number %d. ", 4942 numLive, datanodeThreshold); 4943 } 4944 msg += leaveMsg; 4945 // threshold is not reached or manual or resources low 4946 if(reached == 0 || (isManual() && !areResourcesLow())) { 4947 return msg; 4948 } 4949 // extension period is in progress 4950 return msg + (reached + extension - now() > 0 ? 4951 " in " + (reached + extension - now()) / 1000 + " seconds." 4952 : " soon."); 4953 } 4954 4955 /** 4956 * Print status every 20 seconds. 4957 */ 4958 private void reportStatus(String msg, boolean rightNow) { 4959 long curTime = now(); 4960 if(!rightNow && (curTime - lastStatusReport < 20 * 1000)) 4961 return; 4962 NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip()); 4963 lastStatusReport = curTime; 4964 } 4965 4966 @Override 4967 public String toString() { 4968 String resText = "Current safe blocks = " 4969 + blockSafe 4970 + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold 4971 + ". Minimal replication = " + safeReplication + "."; 4972 if (reached > 0) 4973 resText += " Threshold was reached " + new Date(reached) + "."; 4974 return resText; 4975 } 4976 4977 /** 4978 * Checks consistency of the class state. 4979 * This is costly so only runs if asserts are enabled. 4980 */ 4981 private void doConsistencyCheck() { 4982 boolean assertsOn = false; 4983 assert assertsOn = true; // set to true if asserts are on 4984 if (!assertsOn) return; 4985 4986 if (blockTotal == -1 && blockSafe == -1) { 4987 return; // manual safe mode 4988 } 4989 int activeBlocks = blockManager.getActiveBlockCount(); 4990 if ((blockTotal != activeBlocks) && 4991 !(blockSafe >= 0 && blockSafe <= blockTotal)) { 4992 throw new AssertionError( 4993 " SafeMode: Inconsistent filesystem state: " 4994 + "SafeMode data: blockTotal=" + blockTotal 4995 + " blockSafe=" + blockSafe + "; " 4996 + "BlockManager data: active=" + activeBlocks); 4997 } 4998 } 4999 5000 private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) { 5001 if (!shouldIncrementallyTrackBlocks) { 5002 return; 5003 } 5004 assert haEnabled; 5005 5006 if (LOG.isDebugEnabled()) { 5007 LOG.debug("Adjusting block totals from " + 5008 blockSafe + "/" + blockTotal + " to " + 5009 (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal)); 5010 } 5011 assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " + 5012 blockSafe + " by " + deltaSafe + ": would be negative"; 5013 assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " + 5014 blockTotal + " by " + deltaTotal + ": would be negative"; 5015 5016 blockSafe += deltaSafe; 5017 setBlockTotal(blockTotal + deltaTotal); 5018 } 5019 } 5020 5021 /** 5022 * Periodically check whether it is time to leave safe mode. 5023 * This thread starts when the threshold level is reached. 5024 * 5025 */ 5026 class SafeModeMonitor implements Runnable { 5027 /** interval in msec for checking safe mode: {@value} */ 5028 private static final long recheckInterval = 1000; 5029 5030 /** 5031 */ 5032 @Override 5033 public void run() { 5034 while (fsRunning) { 5035 writeLock(); 5036 try { 5037 if (safeMode == null) { // Not in safe mode. 5038 break; 5039 } 5040 if (safeMode.canLeave()) { 5041 // Leave safe mode. 5042 safeMode.leave(); 5043 smmthread = null; 5044 break; 5045 } 5046 } finally { 5047 writeUnlock(); 5048 } 5049 5050 try { 5051 Thread.sleep(recheckInterval); 5052 } catch (InterruptedException ie) { 5053 // Ignored 5054 } 5055 } 5056 if (!fsRunning) { 5057 LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread"); 5058 } 5059 } 5060 } 5061 5062 boolean setSafeMode(SafeModeAction action) throws IOException { 5063 if (action != SafeModeAction.SAFEMODE_GET) { 5064 checkSuperuserPrivilege(); 5065 switch(action) { 5066 case SAFEMODE_LEAVE: // leave safe mode 5067 leaveSafeMode(); 5068 break; 5069 case SAFEMODE_ENTER: // enter safe mode 5070 enterSafeMode(false); 5071 break; 5072 default: 5073 LOG.error("Unexpected safe mode action"); 5074 } 5075 } 5076 return isInSafeMode(); 5077 } 5078 5079 @Override 5080 public void checkSafeMode() { 5081 // safeMode is volatile, and may be set to null at any time 5082 SafeModeInfo safeMode = this.safeMode; 5083 if (safeMode != null) { 5084 safeMode.checkMode(); 5085 } 5086 } 5087 5088 @Override 5089 public boolean isInSafeMode() { 5090 // safeMode is volatile, and may be set to null at any time 5091 SafeModeInfo safeMode = this.safeMode; 5092 if (safeMode == null) 5093 return false; 5094 return safeMode.isOn(); 5095 } 5096 5097 @Override 5098 public boolean isInStartupSafeMode() { 5099 // safeMode is volatile, and may be set to null at any time 5100 SafeModeInfo safeMode = this.safeMode; 5101 if (safeMode == null) 5102 return false; 5103 // If the NN is in safemode, and not due to manual / low resources, we 5104 // assume it must be because of startup. If the NN had low resources during 5105 // startup, we assume it came out of startup safemode and it is now in low 5106 // resources safemode 5107 return !safeMode.isManual() && !safeMode.areResourcesLow() 5108 && safeMode.isOn(); 5109 } 5110 5111 /** 5112 * Check if replication queues are to be populated 5113 * @return true when node is HAState.Active and not in the very first safemode 5114 */ 5115 @Override 5116 public boolean isPopulatingReplQueues() { 5117 if (!shouldPopulateReplQueues()) { 5118 return false; 5119 } 5120 // safeMode is volatile, and may be set to null at any time 5121 SafeModeInfo safeMode = this.safeMode; 5122 if (safeMode == null) 5123 return true; 5124 return safeMode.isPopulatingReplQueues(); 5125 } 5126 5127 private boolean shouldPopulateReplQueues() { 5128 if(haContext == null || haContext.getState() == null) 5129 return false; 5130 return haContext.getState().shouldPopulateReplQueues(); 5131 } 5132 5133 @Override 5134 public void incrementSafeBlockCount(int replication) { 5135 // safeMode is volatile, and may be set to null at any time 5136 SafeModeInfo safeMode = this.safeMode; 5137 if (safeMode == null) 5138 return; 5139 safeMode.incrementSafeBlockCount((short)replication); 5140 } 5141 5142 @Override 5143 public void decrementSafeBlockCount(Block b) { 5144 // safeMode is volatile, and may be set to null at any time 5145 SafeModeInfo safeMode = this.safeMode; 5146 if (safeMode == null) // mostly true 5147 return; 5148 BlockInfo storedBlock = getStoredBlock(b); 5149 if (storedBlock.isComplete()) { 5150 safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas()); 5151 } 5152 } 5153 5154 /** 5155 * Adjust the total number of blocks safe and expected during safe mode. 5156 * If safe mode is not currently on, this is a no-op. 5157 * @param deltaSafe the change in number of safe blocks 5158 * @param deltaTotal the change i nnumber of total blocks expected 5159 */ 5160 @Override 5161 public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) { 5162 // safeMode is volatile, and may be set to null at any time 5163 SafeModeInfo safeMode = this.safeMode; 5164 if (safeMode == null) 5165 return; 5166 safeMode.adjustBlockTotals(deltaSafe, deltaTotal); 5167 } 5168 5169 /** 5170 * Set the total number of blocks in the system. 5171 */ 5172 public void setBlockTotal() { 5173 // safeMode is volatile, and may be set to null at any time 5174 SafeModeInfo safeMode = this.safeMode; 5175 if (safeMode == null) 5176 return; 5177 safeMode.setBlockTotal((int)getCompleteBlocksTotal()); 5178 } 5179 5180 /** 5181 * Get the total number of blocks in the system. 5182 */ 5183 @Override // FSNamesystemMBean 5184 @Metric 5185 public long getBlocksTotal() { 5186 return blockManager.getTotalBlocks(); 5187 } 5188 5189 /** 5190 * Get the total number of COMPLETE blocks in the system. 5191 * For safe mode only complete blocks are counted. 5192 */ 5193 private long getCompleteBlocksTotal() { 5194 // Calculate number of blocks under construction 5195 long numUCBlocks = 0; 5196 readLock(); 5197 try { 5198 for (Lease lease : leaseManager.getSortedLeases()) { 5199 for (String path : lease.getPaths()) { 5200 final INodeFileUnderConstruction cons; 5201 try { 5202 cons = INodeFileUnderConstruction.valueOf(dir.getINode(path), path); 5203 } catch (UnresolvedLinkException e) { 5204 throw new AssertionError("Lease files should reside on this FS"); 5205 } catch (IOException e) { 5206 throw new RuntimeException(e); 5207 } 5208 BlockInfo[] blocks = cons.getBlocks(); 5209 if(blocks == null) 5210 continue; 5211 for(BlockInfo b : blocks) { 5212 if(!b.isComplete()) 5213 numUCBlocks++; 5214 } 5215 } 5216 } 5217 LOG.info("Number of blocks under construction: " + numUCBlocks); 5218 return getBlocksTotal() - numUCBlocks; 5219 } finally { 5220 readUnlock(); 5221 } 5222 } 5223 5224 /** 5225 * Enter safe mode. If resourcesLow is false, then we assume it is manual 5226 * @throws IOException 5227 */ 5228 void enterSafeMode(boolean resourcesLow) throws IOException { 5229 writeLock(); 5230 try { 5231 // Stop the secret manager, since rolling the master key would 5232 // try to write to the edit log 5233 stopSecretManager(); 5234 5235 // Ensure that any concurrent operations have been fully synced 5236 // before entering safe mode. This ensures that the FSImage 5237 // is entirely stable on disk as soon as we're in safe mode. 5238 boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite(); 5239 // Before Editlog is in OpenForWrite mode, editLogStream will be null. So, 5240 // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode 5241 if (isEditlogOpenForWrite) { 5242 getEditLog().logSyncAll(); 5243 } 5244 if (!isInSafeMode()) { 5245 safeMode = new SafeModeInfo(resourcesLow, isPopulatingReplQueues()); 5246 return; 5247 } 5248 if (resourcesLow) { 5249 safeMode.setResourcesLow(); 5250 } else { 5251 safeMode.setManual(); 5252 } 5253 if (isEditlogOpenForWrite) { 5254 getEditLog().logSyncAll(); 5255 } 5256 NameNode.stateChangeLog.info("STATE* Safe mode is ON" 5257 + safeMode.getTurnOffTip()); 5258 } finally { 5259 writeUnlock(); 5260 } 5261 } 5262 5263 /** 5264 * Leave safe mode. 5265 * @throws IOException 5266 */ 5267 void leaveSafeMode() { 5268 writeLock(); 5269 try { 5270 if (!isInSafeMode()) { 5271 NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); 5272 return; 5273 } 5274 safeMode.leave(); 5275 } finally { 5276 writeUnlock(); 5277 } 5278 } 5279 5280 String getSafeModeTip() { 5281 readLock(); 5282 try { 5283 if (!isInSafeMode()) { 5284 return ""; 5285 } 5286 return safeMode.getTurnOffTip(); 5287 } finally { 5288 readUnlock(); 5289 } 5290 } 5291 5292 CheckpointSignature rollEditLog() throws IOException { 5293 checkSuperuserPrivilege(); 5294 checkOperation(OperationCategory.JOURNAL); 5295 writeLock(); 5296 try { 5297 checkOperation(OperationCategory.JOURNAL); 5298 checkNameNodeSafeMode("Log not rolled"); 5299 if (Server.isRpcInvocation()) { 5300 LOG.info("Roll Edit Log from " + Server.getRemoteAddress()); 5301 } 5302 return getFSImage().rollEditLog(); 5303 } finally { 5304 writeUnlock(); 5305 } 5306 } 5307 5308 NamenodeCommand startCheckpoint(NamenodeRegistration backupNode, 5309 NamenodeRegistration activeNamenode) throws IOException { 5310 checkOperation(OperationCategory.CHECKPOINT); 5311 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 5312 null); 5313 if (cacheEntry != null && cacheEntry.isSuccess()) { 5314 return (NamenodeCommand) cacheEntry.getPayload(); 5315 } 5316 writeLock(); 5317 NamenodeCommand cmd = null; 5318 try { 5319 checkOperation(OperationCategory.CHECKPOINT); 5320 5321 checkNameNodeSafeMode("Checkpoint not started"); 5322 LOG.info("Start checkpoint for " + backupNode.getAddress()); 5323 cmd = getFSImage().startCheckpoint(backupNode, activeNamenode); 5324 getEditLog().logSync(); 5325 return cmd; 5326 } finally { 5327 writeUnlock(); 5328 RetryCache.setState(cacheEntry, cmd != null, cmd); 5329 } 5330 } 5331 5332 public void processIncrementalBlockReport(final DatanodeID nodeID, 5333 final String poolId, final StorageReceivedDeletedBlocks srdb) 5334 throws IOException { 5335 writeLock(); 5336 try { 5337 blockManager.processIncrementalBlockReport(nodeID, poolId, srdb); 5338 } finally { 5339 writeUnlock(); 5340 } 5341 } 5342 5343 void endCheckpoint(NamenodeRegistration registration, 5344 CheckpointSignature sig) throws IOException { 5345 checkOperation(OperationCategory.CHECKPOINT); 5346 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 5347 if (cacheEntry != null && cacheEntry.isSuccess()) { 5348 return; // Return previous response 5349 } 5350 boolean success = false; 5351 readLock(); 5352 try { 5353 checkOperation(OperationCategory.CHECKPOINT); 5354 5355 checkNameNodeSafeMode("Checkpoint not ended"); 5356 LOG.info("End checkpoint for " + registration.getAddress()); 5357 getFSImage().endCheckpoint(sig); 5358 success = true; 5359 } finally { 5360 readUnlock(); 5361 RetryCache.setState(cacheEntry, success); 5362 } 5363 } 5364 5365 PermissionStatus createFsOwnerPermissions(FsPermission permission) { 5366 return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission); 5367 } 5368 5369 private void checkOwner(FSPermissionChecker pc, String path) 5370 throws AccessControlException, UnresolvedLinkException { 5371 checkPermission(pc, path, true, null, null, null, null); 5372 } 5373 5374 private void checkPathAccess(FSPermissionChecker pc, 5375 String path, FsAction access) throws AccessControlException, 5376 UnresolvedLinkException { 5377 checkPermission(pc, path, false, null, null, access, null); 5378 } 5379 5380 private void checkParentAccess(FSPermissionChecker pc, 5381 String path, FsAction access) throws AccessControlException, 5382 UnresolvedLinkException { 5383 checkPermission(pc, path, false, null, access, null, null); 5384 } 5385 5386 private void checkAncestorAccess(FSPermissionChecker pc, 5387 String path, FsAction access) throws AccessControlException, 5388 UnresolvedLinkException { 5389 checkPermission(pc, path, false, access, null, null, null); 5390 } 5391 5392 private void checkTraverse(FSPermissionChecker pc, String path) 5393 throws AccessControlException, UnresolvedLinkException { 5394 checkPermission(pc, path, false, null, null, null, null); 5395 } 5396 5397 @Override 5398 public void checkSuperuserPrivilege() 5399 throws AccessControlException { 5400 if (isPermissionEnabled) { 5401 FSPermissionChecker pc = getPermissionChecker(); 5402 pc.checkSuperuserPrivilege(); 5403 } 5404 } 5405 5406 /** 5407 * Check whether current user have permissions to access the path. For more 5408 * details of the parameters, see 5409 * {@link FSPermissionChecker#checkPermission()}. 5410 */ 5411 private void checkPermission(FSPermissionChecker pc, 5412 String path, boolean doCheckOwner, FsAction ancestorAccess, 5413 FsAction parentAccess, FsAction access, FsAction subAccess) 5414 throws AccessControlException, UnresolvedLinkException { 5415 checkPermission(pc, path, doCheckOwner, ancestorAccess, 5416 parentAccess, access, subAccess, true); 5417 } 5418 5419 /** 5420 * Check whether current user have permissions to access the path. For more 5421 * details of the parameters, see 5422 * {@link FSPermissionChecker#checkPermission()}. 5423 */ 5424 private void checkPermission(FSPermissionChecker pc, 5425 String path, boolean doCheckOwner, FsAction ancestorAccess, 5426 FsAction parentAccess, FsAction access, FsAction subAccess, 5427 boolean resolveLink) 5428 throws AccessControlException, UnresolvedLinkException { 5429 if (!pc.isSuperUser()) { 5430 dir.waitForReady(); 5431 readLock(); 5432 try { 5433 pc.checkPermission(path, dir.rootDir, doCheckOwner, ancestorAccess, 5434 parentAccess, access, subAccess, resolveLink); 5435 } finally { 5436 readUnlock(); 5437 } 5438 } 5439 } 5440 5441 /** 5442 * Check to see if we have exceeded the limit on the number 5443 * of inodes. 5444 */ 5445 void checkFsObjectLimit() throws IOException { 5446 if (maxFsObjects != 0 && 5447 maxFsObjects <= dir.totalInodes() + getBlocksTotal()) { 5448 throw new IOException("Exceeded the configured number of objects " + 5449 maxFsObjects + " in the filesystem."); 5450 } 5451 } 5452 5453 /** 5454 * Get the total number of objects in the system. 5455 */ 5456 @Override // FSNamesystemMBean 5457 public long getMaxObjects() { 5458 return maxFsObjects; 5459 } 5460 5461 @Override // FSNamesystemMBean 5462 @Metric 5463 public long getFilesTotal() { 5464 readLock(); 5465 try { 5466 return this.dir.totalInodes(); 5467 } finally { 5468 readUnlock(); 5469 } 5470 } 5471 5472 @Override // FSNamesystemMBean 5473 @Metric 5474 public long getPendingReplicationBlocks() { 5475 return blockManager.getPendingReplicationBlocksCount(); 5476 } 5477 5478 @Override // FSNamesystemMBean 5479 @Metric 5480 public long getUnderReplicatedBlocks() { 5481 return blockManager.getUnderReplicatedBlocksCount(); 5482 } 5483 5484 /** Returns number of blocks with corrupt replicas */ 5485 @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"}) 5486 public long getCorruptReplicaBlocks() { 5487 return blockManager.getCorruptReplicaBlocksCount(); 5488 } 5489 5490 @Override // FSNamesystemMBean 5491 @Metric 5492 public long getScheduledReplicationBlocks() { 5493 return blockManager.getScheduledReplicationBlocksCount(); 5494 } 5495 5496 @Metric 5497 public long getPendingDeletionBlocks() { 5498 return blockManager.getPendingDeletionBlocksCount(); 5499 } 5500 5501 @Metric 5502 public long getExcessBlocks() { 5503 return blockManager.getExcessBlocksCount(); 5504 } 5505 5506 // HA-only metric 5507 @Metric 5508 public long getPostponedMisreplicatedBlocks() { 5509 return blockManager.getPostponedMisreplicatedBlocksCount(); 5510 } 5511 5512 // HA-only metric 5513 @Metric 5514 public int getPendingDataNodeMessageCount() { 5515 return blockManager.getPendingDataNodeMessageCount(); 5516 } 5517 5518 // HA-only metric 5519 @Metric 5520 public String getHAState() { 5521 return haContext.getState().toString(); 5522 } 5523 5524 // HA-only metric 5525 @Metric 5526 public long getMillisSinceLastLoadedEdits() { 5527 if (isInStandbyState() && editLogTailer != null) { 5528 return now() - editLogTailer.getLastLoadTimestamp(); 5529 } else { 5530 return 0; 5531 } 5532 } 5533 5534 @Metric 5535 public int getBlockCapacity() { 5536 return blockManager.getCapacity(); 5537 } 5538 5539 @Override // FSNamesystemMBean 5540 public String getFSState() { 5541 return isInSafeMode() ? "safeMode" : "Operational"; 5542 } 5543 5544 private ObjectName mbeanName; 5545 private ObjectName mxbeanName; 5546 5547 /** 5548 * Register the FSNamesystem MBean using the name 5549 * "hadoop:service=NameNode,name=FSNamesystemState" 5550 */ 5551 private void registerMBean() { 5552 // We can only implement one MXBean interface, so we keep the old one. 5553 try { 5554 StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class); 5555 mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean); 5556 } catch (NotCompliantMBeanException e) { 5557 throw new RuntimeException("Bad MBean setup", e); 5558 } 5559 5560 LOG.info("Registered FSNamesystemState MBean"); 5561 } 5562 5563 /** 5564 * shutdown FSNamesystem 5565 */ 5566 void shutdown() { 5567 if (mbeanName != null) { 5568 MBeans.unregister(mbeanName); 5569 mbeanName = null; 5570 } 5571 if (mxbeanName != null) { 5572 MBeans.unregister(mxbeanName); 5573 mxbeanName = null; 5574 } 5575 if (dir != null) { 5576 dir.shutdown(); 5577 } 5578 if (blockManager != null) { 5579 blockManager.shutdown(); 5580 } 5581 } 5582 5583 5584 @Override // FSNamesystemMBean 5585 public int getNumLiveDataNodes() { 5586 return getBlockManager().getDatanodeManager().getNumLiveDataNodes(); 5587 } 5588 5589 @Override // FSNamesystemMBean 5590 public int getNumDeadDataNodes() { 5591 return getBlockManager().getDatanodeManager().getNumDeadDataNodes(); 5592 } 5593 5594 @Override // FSNamesystemMBean 5595 public int getNumDecomLiveDataNodes() { 5596 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 5597 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); 5598 int liveDecommissioned = 0; 5599 for (DatanodeDescriptor node : live) { 5600 liveDecommissioned += node.isDecommissioned() ? 1 : 0; 5601 } 5602 return liveDecommissioned; 5603 } 5604 5605 @Override // FSNamesystemMBean 5606 public int getNumDecomDeadDataNodes() { 5607 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 5608 getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true); 5609 int deadDecommissioned = 0; 5610 for (DatanodeDescriptor node : dead) { 5611 deadDecommissioned += node.isDecommissioned() ? 1 : 0; 5612 } 5613 return deadDecommissioned; 5614 } 5615 5616 @Override // FSNamesystemMBean 5617 public int getNumDecommissioningDataNodes() { 5618 return getBlockManager().getDatanodeManager().getDecommissioningNodes() 5619 .size(); 5620 } 5621 5622 @Override // FSNamesystemMBean 5623 @Metric({"StaleDataNodes", 5624 "Number of datanodes marked stale due to delayed heartbeat"}) 5625 public int getNumStaleDataNodes() { 5626 return getBlockManager().getDatanodeManager().getNumStaleNodes(); 5627 } 5628 5629 /** 5630 * Sets the current generation stamp for legacy blocks 5631 */ 5632 void setGenerationStampV1(long stamp) { 5633 generationStampV1.setCurrentValue(stamp); 5634 } 5635 5636 /** 5637 * Gets the current generation stamp for legacy blocks 5638 */ 5639 long getGenerationStampV1() { 5640 return generationStampV1.getCurrentValue(); 5641 } 5642 5643 /** 5644 * Gets the current generation stamp for this filesystem 5645 */ 5646 void setGenerationStampV2(long stamp) { 5647 generationStampV2.setCurrentValue(stamp); 5648 } 5649 5650 /** 5651 * Gets the current generation stamp for this filesystem 5652 */ 5653 long getGenerationStampV2() { 5654 return generationStampV2.getCurrentValue(); 5655 } 5656 5657 /** 5658 * Upgrades the generation stamp for the filesystem 5659 * by reserving a sufficient range for all existing blocks. 5660 * Should be invoked only during the first upgrade to 5661 * sequential block IDs. 5662 */ 5663 long upgradeGenerationStampToV2() { 5664 Preconditions.checkState(generationStampV2.getCurrentValue() == 5665 GenerationStamp.LAST_RESERVED_STAMP); 5666 5667 generationStampV2.skipTo( 5668 generationStampV1.getCurrentValue() + 5669 HdfsConstants.RESERVED_GENERATION_STAMPS_V1); 5670 5671 generationStampV1Limit = generationStampV2.getCurrentValue(); 5672 return generationStampV2.getCurrentValue(); 5673 } 5674 5675 /** 5676 * Sets the generation stamp that delineates random and sequentially 5677 * allocated block IDs. 5678 * @param stamp 5679 */ 5680 void setGenerationStampV1Limit(long stamp) { 5681 Preconditions.checkState(generationStampV1Limit == 5682 GenerationStamp.GRANDFATHER_GENERATION_STAMP); 5683 generationStampV1Limit = stamp; 5684 } 5685 5686 /** 5687 * Gets the value of the generation stamp that delineates sequential 5688 * and random block IDs. 5689 */ 5690 long getGenerationStampAtblockIdSwitch() { 5691 return generationStampV1Limit; 5692 } 5693 5694 @VisibleForTesting 5695 SequentialBlockIdGenerator getBlockIdGenerator() { 5696 return blockIdGenerator; 5697 } 5698 5699 /** 5700 * Sets the maximum allocated block ID for this filesystem. This is 5701 * the basis for allocating new block IDs. 5702 */ 5703 void setLastAllocatedBlockId(long blockId) { 5704 blockIdGenerator.skipTo(blockId); 5705 } 5706 5707 /** 5708 * Gets the maximum sequentially allocated block ID for this filesystem 5709 */ 5710 long getLastAllocatedBlockId() { 5711 return blockIdGenerator.getCurrentValue(); 5712 } 5713 5714 /** 5715 * Increments, logs and then returns the stamp 5716 */ 5717 long nextGenerationStamp(boolean legacyBlock) 5718 throws IOException, SafeModeException { 5719 assert hasWriteLock(); 5720 checkNameNodeSafeMode("Cannot get next generation stamp"); 5721 5722 long gs; 5723 if (legacyBlock) { 5724 gs = getNextGenerationStampV1(); 5725 getEditLog().logGenerationStampV1(gs); 5726 } else { 5727 gs = getNextGenerationStampV2(); 5728 getEditLog().logGenerationStampV2(gs); 5729 } 5730 5731 // NB: callers sync the log 5732 return gs; 5733 } 5734 5735 @VisibleForTesting 5736 long getNextGenerationStampV1() throws IOException { 5737 long genStampV1 = generationStampV1.nextValue(); 5738 5739 if (genStampV1 >= generationStampV1Limit) { 5740 // We ran out of generation stamps for legacy blocks. In practice, it 5741 // is extremely unlikely as we reserved 1T v1 generation stamps. The 5742 // result is that we can no longer append to the legacy blocks that 5743 // were created before the upgrade to sequential block IDs. 5744 throw new OutOfV1GenerationStampsException(); 5745 } 5746 5747 return genStampV1; 5748 } 5749 5750 @VisibleForTesting 5751 long getNextGenerationStampV2() { 5752 return generationStampV2.nextValue(); 5753 } 5754 5755 long getGenerationStampV1Limit() { 5756 return generationStampV1Limit; 5757 } 5758 5759 /** 5760 * Determine whether the block ID was randomly generated (legacy) or 5761 * sequentially generated. The generation stamp value is used to 5762 * make the distinction. 5763 * @param block 5764 * @return true if the block ID was randomly generated, false otherwise. 5765 */ 5766 boolean isLegacyBlock(Block block) { 5767 return block.getGenerationStamp() < getGenerationStampV1Limit(); 5768 } 5769 5770 /** 5771 * Increments, logs and then returns the block ID 5772 */ 5773 private long nextBlockId() throws IOException { 5774 assert hasWriteLock(); 5775 checkNameNodeSafeMode("Cannot get next block ID"); 5776 final long blockId = blockIdGenerator.nextValue(); 5777 getEditLog().logAllocateBlockId(blockId); 5778 // NB: callers sync the log 5779 return blockId; 5780 } 5781 5782 private INodeFileUnderConstruction checkUCBlock(ExtendedBlock block, 5783 String clientName) throws IOException { 5784 assert hasWriteLock(); 5785 checkNameNodeSafeMode("Cannot get a new generation stamp and an " 5786 + "access token for block " + block); 5787 5788 // check stored block state 5789 BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block)); 5790 if (storedBlock == null || 5791 storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) { 5792 throw new IOException(block + 5793 " does not exist or is not under Construction" + storedBlock); 5794 } 5795 5796 // check file inode 5797 final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile(); 5798 if (file==null || !file.isUnderConstruction()) { 5799 throw new IOException("The file " + storedBlock + 5800 " belonged to does not exist or it is not under construction."); 5801 } 5802 5803 // check lease 5804 INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction)file; 5805 if (clientName == null || !clientName.equals(pendingFile.getClientName())) { 5806 throw new LeaseExpiredException("Lease mismatch: " + block + 5807 " is accessed by a non lease holder " + clientName); 5808 } 5809 5810 return pendingFile; 5811 } 5812 5813 /** 5814 * Client is reporting some bad block locations. 5815 */ 5816 void reportBadBlocks(LocatedBlock[] blocks) throws IOException { 5817 checkOperation(OperationCategory.WRITE); 5818 NameNode.stateChangeLog.info("*DIR* reportBadBlocks"); 5819 writeLock(); 5820 try { 5821 checkOperation(OperationCategory.WRITE); 5822 for (int i = 0; i < blocks.length; i++) { 5823 ExtendedBlock blk = blocks[i].getBlock(); 5824 DatanodeInfo[] nodes = blocks[i].getLocations(); 5825 String[] storageIDs = blocks[i].getStorageIDs(); 5826 for (int j = 0; j < nodes.length; j++) { 5827 blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j], 5828 storageIDs == null ? null: storageIDs[j], 5829 "client machine reported it"); 5830 } 5831 } 5832 } finally { 5833 writeUnlock(); 5834 } 5835 } 5836 5837 /** 5838 * Get a new generation stamp together with an access token for 5839 * a block under construction 5840 * 5841 * This method is called for recovering a failed pipeline or setting up 5842 * a pipeline to append to a block. 5843 * 5844 * @param block a block 5845 * @param clientName the name of a client 5846 * @return a located block with a new generation stamp and an access token 5847 * @throws IOException if any error occurs 5848 */ 5849 LocatedBlock updateBlockForPipeline(ExtendedBlock block, 5850 String clientName) throws IOException { 5851 LocatedBlock locatedBlock; 5852 checkOperation(OperationCategory.WRITE); 5853 writeLock(); 5854 try { 5855 checkOperation(OperationCategory.WRITE); 5856 5857 // check vadility of parameters 5858 checkUCBlock(block, clientName); 5859 5860 // get a new generation stamp and an access token 5861 block.setGenerationStamp( 5862 nextGenerationStamp(isLegacyBlock(block.getLocalBlock()))); 5863 locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]); 5864 blockManager.setBlockToken(locatedBlock, AccessMode.WRITE); 5865 } finally { 5866 writeUnlock(); 5867 } 5868 // Ensure we record the new generation stamp 5869 getEditLog().logSync(); 5870 return locatedBlock; 5871 } 5872 5873 /** 5874 * Update a pipeline for a block under construction 5875 * 5876 * @param clientName the name of the client 5877 * @param oldBlock and old block 5878 * @param newBlock a new block with a new generation stamp and length 5879 * @param newNodes datanodes in the pipeline 5880 * @throws IOException if any error occurs 5881 */ 5882 void updatePipeline(String clientName, ExtendedBlock oldBlock, 5883 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs) 5884 throws IOException { 5885 checkOperation(OperationCategory.WRITE); 5886 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 5887 if (cacheEntry != null && cacheEntry.isSuccess()) { 5888 return; // Return previous response 5889 } 5890 LOG.info("updatePipeline(block=" + oldBlock 5891 + ", newGenerationStamp=" + newBlock.getGenerationStamp() 5892 + ", newLength=" + newBlock.getNumBytes() 5893 + ", newNodes=" + Arrays.asList(newNodes) 5894 + ", clientName=" + clientName 5895 + ")"); 5896 writeLock(); 5897 boolean success = false; 5898 try { 5899 checkOperation(OperationCategory.WRITE); 5900 checkNameNodeSafeMode("Pipeline not updated"); 5901 assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and " 5902 + oldBlock + " has different block identifier"; 5903 updatePipelineInternal(clientName, oldBlock, newBlock, newNodes, 5904 newStorageIDs, cacheEntry != null); 5905 success = true; 5906 } finally { 5907 writeUnlock(); 5908 RetryCache.setState(cacheEntry, success); 5909 } 5910 getEditLog().logSync(); 5911 LOG.info("updatePipeline(" + oldBlock + ") successfully to " + newBlock); 5912 } 5913 5914 /** @see #updatePipeline(String, ExtendedBlock, ExtendedBlock, DatanodeID[]) */ 5915 private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock, 5916 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs, 5917 boolean logRetryCache) 5918 throws IOException { 5919 assert hasWriteLock(); 5920 // check the vadility of the block and lease holder name 5921 final INodeFileUnderConstruction pendingFile 5922 = checkUCBlock(oldBlock, clientName); 5923 final BlockInfoUnderConstruction blockinfo 5924 = (BlockInfoUnderConstruction)pendingFile.getLastBlock(); 5925 5926 // check new GS & length: this is not expected 5927 if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() || 5928 newBlock.getNumBytes() < blockinfo.getNumBytes()) { 5929 String msg = "Update " + oldBlock + " (len = " + 5930 blockinfo.getNumBytes() + ") to an older state: " + newBlock + 5931 " (len = " + newBlock.getNumBytes() +")"; 5932 LOG.warn(msg); 5933 throw new IOException(msg); 5934 } 5935 5936 // Update old block with the new generation stamp and new length 5937 blockinfo.setNumBytes(newBlock.getNumBytes()); 5938 blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp()); 5939 5940 // find the DatanodeDescriptor objects 5941 final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager() 5942 .getDatanodeStorageInfos(newNodes, newStorageIDs); 5943 blockinfo.setExpectedLocations(storages); 5944 5945 String src = leaseManager.findPath(pendingFile); 5946 dir.persistBlocks(src, pendingFile, logRetryCache); 5947 } 5948 5949 // rename was successful. If any part of the renamed subtree had 5950 // files that were being written to, update with new filename. 5951 void unprotectedChangeLease(String src, String dst) { 5952 assert hasWriteLock(); 5953 leaseManager.changeLease(src, dst); 5954 } 5955 5956 /** 5957 * Serializes leases. 5958 */ 5959 void saveFilesUnderConstruction(DataOutputStream out, 5960 Map<Long, INodeFileUnderConstruction> snapshotUCMap) throws IOException { 5961 // This is run by an inferior thread of saveNamespace, which holds a read 5962 // lock on our behalf. If we took the read lock here, we could block 5963 // for fairness if a writer is waiting on the lock. 5964 synchronized (leaseManager) { 5965 Map<String, INodeFileUnderConstruction> nodes = 5966 leaseManager.getINodesUnderConstruction(); 5967 for (Map.Entry<String, INodeFileUnderConstruction> entry 5968 : nodes.entrySet()) { 5969 // TODO: for HDFS-5428, because of rename operations, some 5970 // under-construction files that are 5971 // in the current fs directory can also be captured in the 5972 // snapshotUCMap. We should remove them from the snapshotUCMap. 5973 snapshotUCMap.remove(entry.getValue().getId()); 5974 } 5975 5976 out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size 5977 for (Map.Entry<String, INodeFileUnderConstruction> entry 5978 : nodes.entrySet()) { 5979 FSImageSerialization.writeINodeUnderConstruction( 5980 out, entry.getValue(), entry.getKey()); 5981 } 5982 for (Map.Entry<Long, INodeFileUnderConstruction> entry 5983 : snapshotUCMap.entrySet()) { 5984 // for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>" 5985 // as their paths 5986 StringBuilder b = new StringBuilder(); 5987 b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX) 5988 .append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING) 5989 .append(Path.SEPARATOR).append(entry.getValue().getId()); 5990 FSImageSerialization.writeINodeUnderConstruction( 5991 out, entry.getValue(), b.toString()); 5992 } 5993 } 5994 } 5995 5996 /** 5997 * Register a Backup name-node, verifying that it belongs 5998 * to the correct namespace, and adding it to the set of 5999 * active journals if necessary. 6000 * 6001 * @param bnReg registration of the new BackupNode 6002 * @param nnReg registration of this NameNode 6003 * @throws IOException if the namespace IDs do not match 6004 */ 6005 void registerBackupNode(NamenodeRegistration bnReg, 6006 NamenodeRegistration nnReg) throws IOException { 6007 writeLock(); 6008 try { 6009 if(getFSImage().getStorage().getNamespaceID() 6010 != bnReg.getNamespaceID()) 6011 throw new IOException("Incompatible namespaceIDs: " 6012 + " Namenode namespaceID = " 6013 + getFSImage().getStorage().getNamespaceID() + "; " 6014 + bnReg.getRole() + 6015 " node namespaceID = " + bnReg.getNamespaceID()); 6016 if (bnReg.getRole() == NamenodeRole.BACKUP) { 6017 getFSImage().getEditLog().registerBackupNode( 6018 bnReg, nnReg); 6019 } 6020 } finally { 6021 writeUnlock(); 6022 } 6023 } 6024 6025 /** 6026 * Release (unregister) backup node. 6027 * <p> 6028 * Find and remove the backup stream corresponding to the node. 6029 * @param registration 6030 * @throws IOException 6031 */ 6032 void releaseBackupNode(NamenodeRegistration registration) 6033 throws IOException { 6034 checkOperation(OperationCategory.WRITE); 6035 writeLock(); 6036 try { 6037 checkOperation(OperationCategory.WRITE); 6038 if(getFSImage().getStorage().getNamespaceID() 6039 != registration.getNamespaceID()) 6040 throw new IOException("Incompatible namespaceIDs: " 6041 + " Namenode namespaceID = " 6042 + getFSImage().getStorage().getNamespaceID() + "; " 6043 + registration.getRole() + 6044 " node namespaceID = " + registration.getNamespaceID()); 6045 getEditLog().releaseBackupStream(registration); 6046 } finally { 6047 writeUnlock(); 6048 } 6049 } 6050 6051 static class CorruptFileBlockInfo { 6052 String path; 6053 Block block; 6054 6055 public CorruptFileBlockInfo(String p, Block b) { 6056 path = p; 6057 block = b; 6058 } 6059 6060 @Override 6061 public String toString() { 6062 return block.getBlockName() + "\t" + path; 6063 } 6064 } 6065 /** 6066 * @param path Restrict corrupt files to this portion of namespace. 6067 * @param startBlockAfter Support for continuation; the set of files we return 6068 * back is ordered by blockid; startBlockAfter tells where to start from 6069 * @return a list in which each entry describes a corrupt file/block 6070 * @throws AccessControlException 6071 * @throws IOException 6072 */ 6073 Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path, 6074 String[] cookieTab) throws IOException { 6075 checkSuperuserPrivilege(); 6076 checkOperation(OperationCategory.READ); 6077 readLock(); 6078 try { 6079 checkOperation(OperationCategory.READ); 6080 if (!isPopulatingReplQueues()) { 6081 throw new IOException("Cannot run listCorruptFileBlocks because " + 6082 "replication queues have not been initialized."); 6083 } 6084 // print a limited # of corrupt files per call 6085 int count = 0; 6086 ArrayList<CorruptFileBlockInfo> corruptFiles = new ArrayList<CorruptFileBlockInfo>(); 6087 6088 final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator(); 6089 6090 if (cookieTab == null) { 6091 cookieTab = new String[] { null }; 6092 } 6093 int skip = getIntCookie(cookieTab[0]); 6094 for (int i = 0; i < skip && blkIterator.hasNext(); i++) { 6095 blkIterator.next(); 6096 } 6097 6098 while (blkIterator.hasNext()) { 6099 Block blk = blkIterator.next(); 6100 final INode inode = (INode)blockManager.getBlockCollection(blk); 6101 skip++; 6102 if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) { 6103 String src = FSDirectory.getFullPathName(inode); 6104 if (src.startsWith(path)){ 6105 corruptFiles.add(new CorruptFileBlockInfo(src, blk)); 6106 count++; 6107 if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED) 6108 break; 6109 } 6110 } 6111 } 6112 cookieTab[0] = String.valueOf(skip); 6113 LOG.info("list corrupt file blocks returned: " + count); 6114 return corruptFiles; 6115 } finally { 6116 readUnlock(); 6117 } 6118 } 6119 6120 /** 6121 * Convert string cookie to integer. 6122 */ 6123 private static int getIntCookie(String cookie){ 6124 int c; 6125 if(cookie == null){ 6126 c = 0; 6127 } else { 6128 try{ 6129 c = Integer.parseInt(cookie); 6130 }catch (NumberFormatException e) { 6131 c = 0; 6132 } 6133 } 6134 c = Math.max(0, c); 6135 return c; 6136 } 6137 6138 /** 6139 * Create delegation token secret manager 6140 */ 6141 private DelegationTokenSecretManager createDelegationTokenSecretManager( 6142 Configuration conf) { 6143 return new DelegationTokenSecretManager(conf.getLong( 6144 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY, 6145 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT), 6146 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 6147 DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT), 6148 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 6149 DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT), 6150 DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL, 6151 conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 6152 DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT), 6153 this); 6154 } 6155 6156 /** 6157 * Returns the DelegationTokenSecretManager instance in the namesystem. 6158 * @return delegation token secret manager object 6159 */ 6160 DelegationTokenSecretManager getDelegationTokenSecretManager() { 6161 return dtSecretManager; 6162 } 6163 6164 /** 6165 * @param renewer 6166 * @return Token<DelegationTokenIdentifier> 6167 * @throws IOException 6168 */ 6169 Token<DelegationTokenIdentifier> getDelegationToken(Text renewer) 6170 throws IOException { 6171 Token<DelegationTokenIdentifier> token; 6172 checkOperation(OperationCategory.WRITE); 6173 writeLock(); 6174 try { 6175 checkOperation(OperationCategory.WRITE); 6176 checkNameNodeSafeMode("Cannot issue delegation token"); 6177 if (!isAllowedDelegationTokenOp()) { 6178 throw new IOException( 6179 "Delegation Token can be issued only with kerberos or web authentication"); 6180 } 6181 if (dtSecretManager == null || !dtSecretManager.isRunning()) { 6182 LOG.warn("trying to get DT with no secret manager running"); 6183 return null; 6184 } 6185 6186 UserGroupInformation ugi = getRemoteUser(); 6187 String user = ugi.getUserName(); 6188 Text owner = new Text(user); 6189 Text realUser = null; 6190 if (ugi.getRealUser() != null) { 6191 realUser = new Text(ugi.getRealUser().getUserName()); 6192 } 6193 DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner, 6194 renewer, realUser); 6195 token = new Token<DelegationTokenIdentifier>( 6196 dtId, dtSecretManager); 6197 long expiryTime = dtSecretManager.getTokenExpiryTime(dtId); 6198 getEditLog().logGetDelegationToken(dtId, expiryTime); 6199 } finally { 6200 writeUnlock(); 6201 } 6202 getEditLog().logSync(); 6203 return token; 6204 } 6205 6206 /** 6207 * 6208 * @param token 6209 * @return New expiryTime of the token 6210 * @throws InvalidToken 6211 * @throws IOException 6212 */ 6213 long renewDelegationToken(Token<DelegationTokenIdentifier> token) 6214 throws InvalidToken, IOException { 6215 long expiryTime; 6216 checkOperation(OperationCategory.WRITE); 6217 writeLock(); 6218 try { 6219 checkOperation(OperationCategory.WRITE); 6220 6221 checkNameNodeSafeMode("Cannot renew delegation token"); 6222 if (!isAllowedDelegationTokenOp()) { 6223 throw new IOException( 6224 "Delegation Token can be renewed only with kerberos or web authentication"); 6225 } 6226 String renewer = getRemoteUser().getShortUserName(); 6227 expiryTime = dtSecretManager.renewToken(token, renewer); 6228 DelegationTokenIdentifier id = new DelegationTokenIdentifier(); 6229 ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); 6230 DataInputStream in = new DataInputStream(buf); 6231 id.readFields(in); 6232 getEditLog().logRenewDelegationToken(id, expiryTime); 6233 } finally { 6234 writeUnlock(); 6235 } 6236 getEditLog().logSync(); 6237 return expiryTime; 6238 } 6239 6240 /** 6241 * 6242 * @param token 6243 * @throws IOException 6244 */ 6245 void cancelDelegationToken(Token<DelegationTokenIdentifier> token) 6246 throws IOException { 6247 checkOperation(OperationCategory.WRITE); 6248 writeLock(); 6249 try { 6250 checkOperation(OperationCategory.WRITE); 6251 6252 checkNameNodeSafeMode("Cannot cancel delegation token"); 6253 String canceller = getRemoteUser().getUserName(); 6254 DelegationTokenIdentifier id = dtSecretManager 6255 .cancelToken(token, canceller); 6256 getEditLog().logCancelDelegationToken(id); 6257 } finally { 6258 writeUnlock(); 6259 } 6260 getEditLog().logSync(); 6261 } 6262 6263 /** 6264 * @param out save state of the secret manager 6265 * @param sdPath String storage directory path 6266 */ 6267 void saveSecretManagerState(DataOutputStream out, String sdPath) 6268 throws IOException { 6269 dtSecretManager.saveSecretManagerState(out, sdPath); 6270 } 6271 6272 /** 6273 * @param in load the state of secret manager from input stream 6274 */ 6275 void loadSecretManagerState(DataInput in) throws IOException { 6276 dtSecretManager.loadSecretManagerState(in); 6277 } 6278 6279 /** 6280 * Log the updateMasterKey operation to edit logs 6281 * 6282 * @param key new delegation key. 6283 */ 6284 public void logUpdateMasterKey(DelegationKey key) { 6285 6286 assert !isInSafeMode() : 6287 "this should never be called while in safemode, since we stop " + 6288 "the DT manager before entering safemode!"; 6289 // No need to hold FSN lock since we don't access any internal 6290 // structures, and this is stopped before the FSN shuts itself 6291 // down, etc. 6292 getEditLog().logUpdateMasterKey(key); 6293 getEditLog().logSync(); 6294 } 6295 6296 /** 6297 * Log the cancellation of expired tokens to edit logs 6298 * 6299 * @param id token identifier to cancel 6300 */ 6301 public void logExpireDelegationToken(DelegationTokenIdentifier id) { 6302 assert !isInSafeMode() : 6303 "this should never be called while in safemode, since we stop " + 6304 "the DT manager before entering safemode!"; 6305 // No need to hold FSN lock since we don't access any internal 6306 // structures, and this is stopped before the FSN shuts itself 6307 // down, etc. 6308 getEditLog().logCancelDelegationToken(id); 6309 } 6310 6311 private void logReassignLease(String leaseHolder, String src, 6312 String newHolder) { 6313 assert hasWriteLock(); 6314 getEditLog().logReassignLease(leaseHolder, src, newHolder); 6315 } 6316 6317 /** 6318 * 6319 * @return true if delegation token operation is allowed 6320 */ 6321 private boolean isAllowedDelegationTokenOp() throws IOException { 6322 AuthenticationMethod authMethod = getConnectionAuthenticationMethod(); 6323 if (UserGroupInformation.isSecurityEnabled() 6324 && (authMethod != AuthenticationMethod.KERBEROS) 6325 && (authMethod != AuthenticationMethod.KERBEROS_SSL) 6326 && (authMethod != AuthenticationMethod.CERTIFICATE)) { 6327 return false; 6328 } 6329 return true; 6330 } 6331 6332 /** 6333 * Returns authentication method used to establish the connection 6334 * @return AuthenticationMethod used to establish connection 6335 * @throws IOException 6336 */ 6337 private AuthenticationMethod getConnectionAuthenticationMethod() 6338 throws IOException { 6339 UserGroupInformation ugi = getRemoteUser(); 6340 AuthenticationMethod authMethod = ugi.getAuthenticationMethod(); 6341 if (authMethod == AuthenticationMethod.PROXY) { 6342 authMethod = ugi.getRealUser().getAuthenticationMethod(); 6343 } 6344 return authMethod; 6345 } 6346 6347 /** 6348 * Client invoked methods are invoked over RPC and will be in 6349 * RPC call context even if the client exits. 6350 */ 6351 private boolean isExternalInvocation() { 6352 return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation(); 6353 } 6354 6355 private static InetAddress getRemoteIp() { 6356 InetAddress ip = Server.getRemoteIp(); 6357 if (ip != null) { 6358 return ip; 6359 } 6360 return NamenodeWebHdfsMethods.getRemoteIp(); 6361 } 6362 6363 // optimize ugi lookup for RPC operations to avoid a trip through 6364 // UGI.getCurrentUser which is synch'ed 6365 private static UserGroupInformation getRemoteUser() throws IOException { 6366 return NameNode.getRemoteUser(); 6367 } 6368 6369 /** 6370 * Log fsck event in the audit log 6371 */ 6372 void logFsckEvent(String src, InetAddress remoteAddress) throws IOException { 6373 if (isAuditEnabled()) { 6374 logAuditEvent(true, getRemoteUser(), 6375 remoteAddress, 6376 "fsck", src, null, null); 6377 } 6378 } 6379 /** 6380 * Register NameNodeMXBean 6381 */ 6382 private void registerMXBean() { 6383 mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this); 6384 } 6385 6386 /** 6387 * Class representing Namenode information for JMX interfaces 6388 */ 6389 @Override // NameNodeMXBean 6390 public String getVersion() { 6391 return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision(); 6392 } 6393 6394 @Override // NameNodeMXBean 6395 public long getUsed() { 6396 return this.getCapacityUsed(); 6397 } 6398 6399 @Override // NameNodeMXBean 6400 public long getFree() { 6401 return this.getCapacityRemaining(); 6402 } 6403 6404 @Override // NameNodeMXBean 6405 public long getTotal() { 6406 return this.getCapacityTotal(); 6407 } 6408 6409 @Override // NameNodeMXBean 6410 public String getSafemode() { 6411 if (!this.isInSafeMode()) 6412 return ""; 6413 return "Safe mode is ON. " + this.getSafeModeTip(); 6414 } 6415 6416 @Override // NameNodeMXBean 6417 public boolean isUpgradeFinalized() { 6418 return this.getFSImage().isUpgradeFinalized(); 6419 } 6420 6421 @Override // NameNodeMXBean 6422 public long getNonDfsUsedSpace() { 6423 return datanodeStatistics.getCapacityUsedNonDFS(); 6424 } 6425 6426 @Override // NameNodeMXBean 6427 public float getPercentUsed() { 6428 return datanodeStatistics.getCapacityUsedPercent(); 6429 } 6430 6431 @Override // NameNodeMXBean 6432 public long getBlockPoolUsedSpace() { 6433 return datanodeStatistics.getBlockPoolUsed(); 6434 } 6435 6436 @Override // NameNodeMXBean 6437 public float getPercentBlockPoolUsed() { 6438 return datanodeStatistics.getPercentBlockPoolUsed(); 6439 } 6440 6441 @Override // NameNodeMXBean 6442 public float getPercentRemaining() { 6443 return datanodeStatistics.getCapacityRemainingPercent(); 6444 } 6445 6446 @Override // NameNodeMXBean 6447 public long getCacheCapacity() { 6448 return datanodeStatistics.getCacheCapacity(); 6449 } 6450 6451 @Override // NameNodeMXBean 6452 public long getCacheUsed() { 6453 return datanodeStatistics.getCacheUsed(); 6454 } 6455 6456 @Override // NameNodeMXBean 6457 public long getTotalBlocks() { 6458 return getBlocksTotal(); 6459 } 6460 6461 @Override // NameNodeMXBean 6462 @Metric 6463 public long getTotalFiles() { 6464 return getFilesTotal(); 6465 } 6466 6467 @Override // NameNodeMXBean 6468 public long getNumberOfMissingBlocks() { 6469 return getMissingBlocksCount(); 6470 } 6471 6472 @Override // NameNodeMXBean 6473 public int getThreads() { 6474 return ManagementFactory.getThreadMXBean().getThreadCount(); 6475 } 6476 6477 /** 6478 * Returned information is a JSON representation of map with host name as the 6479 * key and value is a map of live node attribute keys to its values 6480 */ 6481 @Override // NameNodeMXBean 6482 public String getLiveNodes() { 6483 final Map<String, Map<String,Object>> info = 6484 new HashMap<String, Map<String,Object>>(); 6485 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6486 blockManager.getDatanodeManager().fetchDatanodes(live, null, true); 6487 for (DatanodeDescriptor node : live) { 6488 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder() 6489 .put("infoAddr", node.getInfoAddr()) 6490 .put("infoSecureAddr", node.getInfoSecureAddr()) 6491 .put("xferaddr", node.getXferAddr()) 6492 .put("lastContact", getLastContact(node)) 6493 .put("usedSpace", getDfsUsed(node)) 6494 .put("adminState", node.getAdminState().toString()) 6495 .put("nonDfsUsedSpace", node.getNonDfsUsed()) 6496 .put("capacity", node.getCapacity()) 6497 .put("numBlocks", node.numBlocks()) 6498 .put("version", node.getSoftwareVersion()) 6499 .put("used", node.getDfsUsed()) 6500 .put("remaining", node.getRemaining()) 6501 .put("blockScheduled", node.getBlocksScheduled()) 6502 .put("blockPoolUsed", node.getBlockPoolUsed()) 6503 .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent()) 6504 .put("volfails", node.getVolumeFailures()) 6505 .build(); 6506 6507 info.put(node.getHostName(), innerinfo); 6508 } 6509 return JSON.toString(info); 6510 } 6511 6512 /** 6513 * Returned information is a JSON representation of map with host name as the 6514 * key and value is a map of dead node attribute keys to its values 6515 */ 6516 @Override // NameNodeMXBean 6517 public String getDeadNodes() { 6518 final Map<String, Map<String, Object>> info = 6519 new HashMap<String, Map<String, Object>>(); 6520 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 6521 blockManager.getDatanodeManager().fetchDatanodes(null, dead, true); 6522 for (DatanodeDescriptor node : dead) { 6523 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder() 6524 .put("lastContact", getLastContact(node)) 6525 .put("decommissioned", node.isDecommissioned()) 6526 .put("xferaddr", node.getXferAddr()) 6527 .build(); 6528 info.put(node.getHostName(), innerinfo); 6529 } 6530 return JSON.toString(info); 6531 } 6532 6533 /** 6534 * Returned information is a JSON representation of map with host name as the 6535 * key and value is a map of decomisioning node attribute keys to its values 6536 */ 6537 @Override // NameNodeMXBean 6538 public String getDecomNodes() { 6539 final Map<String, Map<String, Object>> info = 6540 new HashMap<String, Map<String, Object>>(); 6541 final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager( 6542 ).getDecommissioningNodes(); 6543 for (DatanodeDescriptor node : decomNodeList) { 6544 Map<String, Object> innerinfo = ImmutableMap 6545 .<String, Object> builder() 6546 .put("xferaddr", node.getXferAddr()) 6547 .put("underReplicatedBlocks", 6548 node.decommissioningStatus.getUnderReplicatedBlocks()) 6549 .put("decommissionOnlyReplicas", 6550 node.decommissioningStatus.getDecommissionOnlyReplicas()) 6551 .put("underReplicateInOpenFiles", 6552 node.decommissioningStatus.getUnderReplicatedInOpenFiles()) 6553 .build(); 6554 info.put(node.getHostName(), innerinfo); 6555 } 6556 return JSON.toString(info); 6557 } 6558 6559 private long getLastContact(DatanodeDescriptor alivenode) { 6560 return (Time.now() - alivenode.getLastUpdate())/1000; 6561 } 6562 6563 private long getDfsUsed(DatanodeDescriptor alivenode) { 6564 return alivenode.getDfsUsed(); 6565 } 6566 6567 @Override // NameNodeMXBean 6568 public String getClusterId() { 6569 return dir.fsImage.getStorage().getClusterID(); 6570 } 6571 6572 @Override // NameNodeMXBean 6573 public String getBlockPoolId() { 6574 return blockPoolId; 6575 } 6576 6577 @Override // NameNodeMXBean 6578 public String getNameDirStatuses() { 6579 Map<String, Map<File, StorageDirType>> statusMap = 6580 new HashMap<String, Map<File, StorageDirType>>(); 6581 6582 Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>(); 6583 for (Iterator<StorageDirectory> it 6584 = getFSImage().getStorage().dirIterator(); it.hasNext();) { 6585 StorageDirectory st = it.next(); 6586 activeDirs.put(st.getRoot(), st.getStorageDirType()); 6587 } 6588 statusMap.put("active", activeDirs); 6589 6590 List<Storage.StorageDirectory> removedStorageDirs 6591 = getFSImage().getStorage().getRemovedStorageDirs(); 6592 Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>(); 6593 for (StorageDirectory st : removedStorageDirs) { 6594 failedDirs.put(st.getRoot(), st.getStorageDirType()); 6595 } 6596 statusMap.put("failed", failedDirs); 6597 6598 return JSON.toString(statusMap); 6599 } 6600 6601 @Override // NameNodeMXBean 6602 public String getNodeUsage() { 6603 float median = 0; 6604 float max = 0; 6605 float min = 0; 6606 float dev = 0; 6607 6608 final Map<String, Map<String,Object>> info = 6609 new HashMap<String, Map<String,Object>>(); 6610 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6611 blockManager.getDatanodeManager().fetchDatanodes(live, null, true); 6612 6613 if (live.size() > 0) { 6614 float totalDfsUsed = 0; 6615 float[] usages = new float[live.size()]; 6616 int i = 0; 6617 for (DatanodeDescriptor dn : live) { 6618 usages[i++] = dn.getDfsUsedPercent(); 6619 totalDfsUsed += dn.getDfsUsedPercent(); 6620 } 6621 totalDfsUsed /= live.size(); 6622 Arrays.sort(usages); 6623 median = usages[usages.length / 2]; 6624 max = usages[usages.length - 1]; 6625 min = usages[0]; 6626 6627 for (i = 0; i < usages.length; i++) { 6628 dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed); 6629 } 6630 dev = (float) Math.sqrt(dev / usages.length); 6631 } 6632 6633 final Map<String, Object> innerInfo = new HashMap<String, Object>(); 6634 innerInfo.put("min", StringUtils.format("%.2f%%", min)); 6635 innerInfo.put("median", StringUtils.format("%.2f%%", median)); 6636 innerInfo.put("max", StringUtils.format("%.2f%%", max)); 6637 innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev)); 6638 info.put("nodeUsage", innerInfo); 6639 6640 return JSON.toString(info); 6641 } 6642 6643 @Override // NameNodeMXBean 6644 public String getNameJournalStatus() { 6645 List<Map<String, String>> jasList = new ArrayList<Map<String, String>>(); 6646 FSEditLog log = getFSImage().getEditLog(); 6647 if (log != null) { 6648 boolean openForWrite = log.isOpenForWrite(); 6649 for (JournalAndStream jas : log.getJournals()) { 6650 final Map<String, String> jasMap = new HashMap<String, String>(); 6651 String manager = jas.getManager().toString(); 6652 6653 jasMap.put("required", String.valueOf(jas.isRequired())); 6654 jasMap.put("disabled", String.valueOf(jas.isDisabled())); 6655 jasMap.put("manager", manager); 6656 6657 if (jas.isDisabled()) { 6658 jasMap.put("stream", "Failed"); 6659 } else if (openForWrite) { 6660 EditLogOutputStream elos = jas.getCurrentStream(); 6661 if (elos != null) { 6662 jasMap.put("stream", elos.generateReport()); 6663 } else { 6664 jasMap.put("stream", "not currently writing"); 6665 } 6666 } else { 6667 jasMap.put("stream", "open for read"); 6668 } 6669 jasList.add(jasMap); 6670 } 6671 } 6672 return JSON.toString(jasList); 6673 } 6674 6675 @Override // NameNodeMxBean 6676 public String getJournalTransactionInfo() { 6677 Map<String, String> txnIdMap = new HashMap<String, String>(); 6678 txnIdMap.put("LastAppliedOrWrittenTxId", 6679 Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId())); 6680 txnIdMap.put("MostRecentCheckpointTxId", 6681 Long.toString(this.getFSImage().getMostRecentCheckpointTxId())); 6682 return JSON.toString(txnIdMap); 6683 } 6684 6685 @Override // NameNodeMXBean 6686 public String getNNStarted() { 6687 return getStartTime().toString(); 6688 } 6689 6690 @Override // NameNodeMXBean 6691 public String getCompileInfo() { 6692 return VersionInfo.getDate() + " by " + VersionInfo.getUser() + 6693 " from " + VersionInfo.getBranch(); 6694 } 6695 6696 /** @return the block manager. */ 6697 public BlockManager getBlockManager() { 6698 return blockManager; 6699 } 6700 /** @return the FSDirectory. */ 6701 public FSDirectory getFSDirectory() { 6702 return dir; 6703 } 6704 /** @return the cache manager. */ 6705 public CacheManager getCacheManager() { 6706 return cacheManager; 6707 } 6708 6709 @Override // NameNodeMXBean 6710 public String getCorruptFiles() { 6711 List<String> list = new ArrayList<String>(); 6712 Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks; 6713 try { 6714 corruptFileBlocks = listCorruptFileBlocks("/", null); 6715 int corruptFileCount = corruptFileBlocks.size(); 6716 if (corruptFileCount != 0) { 6717 for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) { 6718 list.add(c.toString()); 6719 } 6720 } 6721 } catch (IOException e) { 6722 LOG.warn("Get corrupt file blocks returned error: " + e.getMessage()); 6723 } 6724 return JSON.toString(list); 6725 } 6726 6727 @Override //NameNodeMXBean 6728 public int getDistinctVersionCount() { 6729 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions() 6730 .size(); 6731 } 6732 6733 @Override //NameNodeMXBean 6734 public Map<String, Integer> getDistinctVersions() { 6735 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions(); 6736 } 6737 6738 @Override //NameNodeMXBean 6739 public String getSoftwareVersion() { 6740 return VersionInfo.getVersion(); 6741 } 6742 6743 /** 6744 * Verifies that the given identifier and password are valid and match. 6745 * @param identifier Token identifier. 6746 * @param password Password in the token. 6747 */ 6748 public synchronized void verifyToken(DelegationTokenIdentifier identifier, 6749 byte[] password) throws InvalidToken, RetriableException { 6750 try { 6751 getDelegationTokenSecretManager().verifyToken(identifier, password); 6752 } catch (InvalidToken it) { 6753 if (inTransitionToActive()) { 6754 throw new RetriableException(it); 6755 } 6756 throw it; 6757 } 6758 } 6759 6760 @Override 6761 public boolean isGenStampInFuture(Block block) { 6762 if (isLegacyBlock(block)) { 6763 return block.getGenerationStamp() > getGenerationStampV1(); 6764 } else { 6765 return block.getGenerationStamp() > getGenerationStampV2(); 6766 } 6767 } 6768 6769 @VisibleForTesting 6770 public EditLogTailer getEditLogTailer() { 6771 return editLogTailer; 6772 } 6773 6774 @VisibleForTesting 6775 public void setEditLogTailerForTests(EditLogTailer tailer) { 6776 this.editLogTailer = tailer; 6777 } 6778 6779 @VisibleForTesting 6780 void setFsLockForTests(ReentrantReadWriteLock lock) { 6781 this.fsLock.coarseLock = lock; 6782 } 6783 6784 @VisibleForTesting 6785 ReentrantReadWriteLock getFsLockForTests() { 6786 return fsLock.coarseLock; 6787 } 6788 6789 @VisibleForTesting 6790 public SafeModeInfo getSafeModeInfoForTests() { 6791 return safeMode; 6792 } 6793 6794 @VisibleForTesting 6795 public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) { 6796 this.nnResourceChecker = nnResourceChecker; 6797 } 6798 6799 @Override 6800 public boolean isAvoidingStaleDataNodesForWrite() { 6801 return this.blockManager.getDatanodeManager() 6802 .shouldAvoidStaleDataNodesForWrite(); 6803 } 6804 6805 @Override // FSClusterStats 6806 public int getNumDatanodesInService() { 6807 return getNumLiveDataNodes() - getNumDecomLiveDataNodes(); 6808 } 6809 6810 public SnapshotManager getSnapshotManager() { 6811 return snapshotManager; 6812 } 6813 6814 /** Allow snapshot on a directroy. */ 6815 void allowSnapshot(String path) throws SafeModeException, IOException { 6816 checkOperation(OperationCategory.WRITE); 6817 writeLock(); 6818 try { 6819 checkOperation(OperationCategory.WRITE); 6820 checkNameNodeSafeMode("Cannot allow snapshot for " + path); 6821 checkSuperuserPrivilege(); 6822 6823 dir.writeLock(); 6824 try { 6825 snapshotManager.setSnapshottable(path, true); 6826 } finally { 6827 dir.writeUnlock(); 6828 } 6829 getEditLog().logAllowSnapshot(path); 6830 } finally { 6831 writeUnlock(); 6832 } 6833 getEditLog().logSync(); 6834 6835 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 6836 logAuditEvent(true, "allowSnapshot", path, null, null); 6837 } 6838 } 6839 6840 /** Disallow snapshot on a directory. */ 6841 void disallowSnapshot(String path) throws SafeModeException, IOException { 6842 checkOperation(OperationCategory.WRITE); 6843 writeLock(); 6844 try { 6845 checkOperation(OperationCategory.WRITE); 6846 checkNameNodeSafeMode("Cannot disallow snapshot for " + path); 6847 checkSuperuserPrivilege(); 6848 6849 dir.writeLock(); 6850 try { 6851 snapshotManager.resetSnapshottable(path); 6852 } finally { 6853 dir.writeUnlock(); 6854 } 6855 getEditLog().logDisallowSnapshot(path); 6856 } finally { 6857 writeUnlock(); 6858 } 6859 getEditLog().logSync(); 6860 6861 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 6862 logAuditEvent(true, "disallowSnapshot", path, null, null); 6863 } 6864 } 6865 6866 /** 6867 * Create a snapshot 6868 * @param snapshotRoot The directory path where the snapshot is taken 6869 * @param snapshotName The name of the snapshot 6870 */ 6871 String createSnapshot(String snapshotRoot, String snapshotName) 6872 throws SafeModeException, IOException { 6873 checkOperation(OperationCategory.WRITE); 6874 final FSPermissionChecker pc = getPermissionChecker(); 6875 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 6876 null); 6877 if (cacheEntry != null && cacheEntry.isSuccess()) { 6878 return (String) cacheEntry.getPayload(); 6879 } 6880 writeLock(); 6881 String snapshotPath = null; 6882 try { 6883 checkOperation(OperationCategory.WRITE); 6884 checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot); 6885 if (isPermissionEnabled) { 6886 checkOwner(pc, snapshotRoot); 6887 } 6888 6889 if (snapshotName == null || snapshotName.isEmpty()) { 6890 snapshotName = Snapshot.generateDefaultSnapshotName(); 6891 } 6892 dir.verifySnapshotName(snapshotName, snapshotRoot); 6893 dir.writeLock(); 6894 try { 6895 snapshotPath = snapshotManager.createSnapshot(snapshotRoot, snapshotName); 6896 } finally { 6897 dir.writeUnlock(); 6898 } 6899 getEditLog().logCreateSnapshot(snapshotRoot, snapshotName, 6900 cacheEntry != null); 6901 } finally { 6902 writeUnlock(); 6903 RetryCache.setState(cacheEntry, snapshotPath != null, snapshotPath); 6904 } 6905 getEditLog().logSync(); 6906 6907 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 6908 logAuditEvent(true, "createSnapshot", snapshotRoot, snapshotPath, null); 6909 } 6910 return snapshotPath; 6911 } 6912 6913 /** 6914 * Rename a snapshot 6915 * @param path The directory path where the snapshot was taken 6916 * @param snapshotOldName Old snapshot name 6917 * @param snapshotNewName New snapshot name 6918 * @throws SafeModeException 6919 * @throws IOException 6920 */ 6921 void renameSnapshot(String path, String snapshotOldName, 6922 String snapshotNewName) throws SafeModeException, IOException { 6923 checkOperation(OperationCategory.WRITE); 6924 final FSPermissionChecker pc = getPermissionChecker(); 6925 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 6926 if (cacheEntry != null && cacheEntry.isSuccess()) { 6927 return; // Return previous response 6928 } 6929 writeLock(); 6930 boolean success = false; 6931 try { 6932 checkOperation(OperationCategory.WRITE); 6933 checkNameNodeSafeMode("Cannot rename snapshot for " + path); 6934 if (isPermissionEnabled) { 6935 checkOwner(pc, path); 6936 } 6937 dir.verifySnapshotName(snapshotNewName, path); 6938 6939 snapshotManager.renameSnapshot(path, snapshotOldName, snapshotNewName); 6940 getEditLog().logRenameSnapshot(path, snapshotOldName, snapshotNewName, 6941 cacheEntry != null); 6942 success = true; 6943 } finally { 6944 writeUnlock(); 6945 RetryCache.setState(cacheEntry, success); 6946 } 6947 getEditLog().logSync(); 6948 6949 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 6950 String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName); 6951 String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName); 6952 logAuditEvent(true, "renameSnapshot", oldSnapshotRoot, newSnapshotRoot, null); 6953 } 6954 } 6955 6956 /** 6957 * Get the list of snapshottable directories that are owned 6958 * by the current user. Return all the snapshottable directories if the 6959 * current user is a super user. 6960 * @return The list of all the current snapshottable directories 6961 * @throws IOException 6962 */ 6963 public SnapshottableDirectoryStatus[] getSnapshottableDirListing() 6964 throws IOException { 6965 SnapshottableDirectoryStatus[] status = null; 6966 checkOperation(OperationCategory.READ); 6967 final FSPermissionChecker checker = getPermissionChecker(); 6968 readLock(); 6969 try { 6970 checkOperation(OperationCategory.READ); 6971 final String user = checker.isSuperUser()? null : checker.getUser(); 6972 status = snapshotManager.getSnapshottableDirListing(user); 6973 } finally { 6974 readUnlock(); 6975 } 6976 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 6977 logAuditEvent(true, "listSnapshottableDirectory", null, null, null); 6978 } 6979 return status; 6980 } 6981 6982 /** 6983 * Get the difference between two snapshots (or between a snapshot and the 6984 * current status) of a snapshottable directory. 6985 * 6986 * @param path The full path of the snapshottable directory. 6987 * @param fromSnapshot Name of the snapshot to calculate the diff from. Null 6988 * or empty string indicates the current tree. 6989 * @param toSnapshot Name of the snapshot to calculated the diff to. Null or 6990 * empty string indicates the current tree. 6991 * @return A report about the difference between {@code fromSnapshot} and 6992 * {@code toSnapshot}. Modified/deleted/created/renamed files and 6993 * directories belonging to the snapshottable directories are listed 6994 * and labeled as M/-/+/R respectively. 6995 * @throws IOException 6996 */ 6997 SnapshotDiffReport getSnapshotDiffReport(String path, 6998 String fromSnapshot, String toSnapshot) throws IOException { 6999 SnapshotDiffInfo diffs = null; 7000 checkOperation(OperationCategory.READ); 7001 final FSPermissionChecker pc = getPermissionChecker(); 7002 readLock(); 7003 try { 7004 checkOperation(OperationCategory.READ); 7005 if (isPermissionEnabled) { 7006 checkSubtreeReadPermission(pc, path, fromSnapshot); 7007 checkSubtreeReadPermission(pc, path, toSnapshot); 7008 } 7009 diffs = snapshotManager.diff(path, fromSnapshot, toSnapshot); 7010 } finally { 7011 readUnlock(); 7012 } 7013 7014 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7015 logAuditEvent(true, "computeSnapshotDiff", null, null, null); 7016 } 7017 return diffs != null ? diffs.generateReport() : new SnapshotDiffReport( 7018 path, fromSnapshot, toSnapshot, 7019 Collections.<DiffReportEntry> emptyList()); 7020 } 7021 7022 private void checkSubtreeReadPermission(final FSPermissionChecker pc, 7023 final String snapshottablePath, final String snapshot) 7024 throws AccessControlException, UnresolvedLinkException { 7025 final String fromPath = snapshot == null? 7026 snapshottablePath: Snapshot.getSnapshotPath(snapshottablePath, snapshot); 7027 checkPermission(pc, fromPath, false, null, null, FsAction.READ, FsAction.READ); 7028 } 7029 7030 /** 7031 * Delete a snapshot of a snapshottable directory 7032 * @param snapshotRoot The snapshottable directory 7033 * @param snapshotName The name of the to-be-deleted snapshot 7034 * @throws SafeModeException 7035 * @throws IOException 7036 */ 7037 void deleteSnapshot(String snapshotRoot, String snapshotName) 7038 throws SafeModeException, IOException { 7039 checkOperation(OperationCategory.WRITE); 7040 final FSPermissionChecker pc = getPermissionChecker(); 7041 7042 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7043 if (cacheEntry != null && cacheEntry.isSuccess()) { 7044 return; // Return previous response 7045 } 7046 boolean success = false; 7047 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); 7048 writeLock(); 7049 try { 7050 checkOperation(OperationCategory.WRITE); 7051 checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot); 7052 if (isPermissionEnabled) { 7053 checkOwner(pc, snapshotRoot); 7054 } 7055 7056 List<INode> removedINodes = new ChunkedArrayList<INode>(); 7057 dir.writeLock(); 7058 try { 7059 snapshotManager.deleteSnapshot(snapshotRoot, snapshotName, 7060 collectedBlocks, removedINodes); 7061 dir.removeFromInodeMap(removedINodes); 7062 } finally { 7063 dir.writeUnlock(); 7064 } 7065 removedINodes.clear(); 7066 getEditLog().logDeleteSnapshot(snapshotRoot, snapshotName, 7067 cacheEntry != null); 7068 success = true; 7069 } finally { 7070 writeUnlock(); 7071 RetryCache.setState(cacheEntry, success); 7072 } 7073 getEditLog().logSync(); 7074 7075 removeBlocks(collectedBlocks); 7076 collectedBlocks.clear(); 7077 7078 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7079 String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName); 7080 logAuditEvent(true, "deleteSnapshot", rootPath, null, null); 7081 } 7082 } 7083 7084 /** 7085 * Remove a list of INodeDirectorySnapshottable from the SnapshotManager 7086 * @param toRemove the list of INodeDirectorySnapshottable to be removed 7087 */ 7088 void removeSnapshottableDirs(List<INodeDirectorySnapshottable> toRemove) { 7089 if (snapshotManager != null) { 7090 snapshotManager.removeSnapshottable(toRemove); 7091 } 7092 } 7093 7094 long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags) 7095 throws IOException { 7096 checkOperation(OperationCategory.WRITE); 7097 final FSPermissionChecker pc = isPermissionEnabled ? 7098 getPermissionChecker() : null; 7099 CacheEntryWithPayload cacheEntry = 7100 RetryCache.waitForCompletion(retryCache, null); 7101 if (cacheEntry != null && cacheEntry.isSuccess()) { 7102 return (Long) cacheEntry.getPayload(); 7103 } 7104 boolean success = false; 7105 if (!flags.contains(CacheFlag.FORCE)) { 7106 cacheManager.waitForRescanIfNeeded(); 7107 } 7108 writeLock(); 7109 Long result = null; 7110 try { 7111 checkOperation(OperationCategory.WRITE); 7112 if (isInSafeMode()) { 7113 throw new SafeModeException( 7114 "Cannot add cache directive", safeMode); 7115 } 7116 if (directive.getId() != null) { 7117 throw new IOException("addDirective: you cannot specify an ID " + 7118 "for this operation."); 7119 } 7120 CacheDirectiveInfo effectiveDirective = 7121 cacheManager.addDirective(directive, pc, flags); 7122 getEditLog().logAddCacheDirectiveInfo(effectiveDirective, 7123 cacheEntry != null); 7124 result = effectiveDirective.getId(); 7125 success = true; 7126 } finally { 7127 writeUnlock(); 7128 if (success) { 7129 getEditLog().logSync(); 7130 } 7131 if (isAuditEnabled() && isExternalInvocation()) { 7132 logAuditEvent(success, "addCacheDirective", null, null, null); 7133 } 7134 RetryCache.setState(cacheEntry, success, result); 7135 } 7136 return result; 7137 } 7138 7139 void modifyCacheDirective(CacheDirectiveInfo directive, 7140 EnumSet<CacheFlag> flags) throws IOException { 7141 checkOperation(OperationCategory.WRITE); 7142 final FSPermissionChecker pc = isPermissionEnabled ? 7143 getPermissionChecker() : null; 7144 boolean success = false; 7145 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7146 if (cacheEntry != null && cacheEntry.isSuccess()) { 7147 return; 7148 } 7149 if (!flags.contains(CacheFlag.FORCE)) { 7150 cacheManager.waitForRescanIfNeeded(); 7151 } 7152 writeLock(); 7153 try { 7154 checkOperation(OperationCategory.WRITE); 7155 if (isInSafeMode()) { 7156 throw new SafeModeException( 7157 "Cannot add cache directive", safeMode); 7158 } 7159 cacheManager.modifyDirective(directive, pc, flags); 7160 getEditLog().logModifyCacheDirectiveInfo(directive, 7161 cacheEntry != null); 7162 success = true; 7163 } finally { 7164 writeUnlock(); 7165 if (success) { 7166 getEditLog().logSync(); 7167 } 7168 if (isAuditEnabled() && isExternalInvocation()) { 7169 logAuditEvent(success, "modifyCacheDirective", null, null, null); 7170 } 7171 RetryCache.setState(cacheEntry, success); 7172 } 7173 } 7174 7175 void removeCacheDirective(Long id) throws IOException { 7176 checkOperation(OperationCategory.WRITE); 7177 final FSPermissionChecker pc = isPermissionEnabled ? 7178 getPermissionChecker() : null; 7179 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7180 if (cacheEntry != null && cacheEntry.isSuccess()) { 7181 return; 7182 } 7183 boolean success = false; 7184 writeLock(); 7185 try { 7186 checkOperation(OperationCategory.WRITE); 7187 if (isInSafeMode()) { 7188 throw new SafeModeException( 7189 "Cannot remove cache directives", safeMode); 7190 } 7191 cacheManager.removeDirective(id, pc); 7192 getEditLog().logRemoveCacheDirectiveInfo(id, cacheEntry != null); 7193 success = true; 7194 } finally { 7195 writeUnlock(); 7196 if (isAuditEnabled() && isExternalInvocation()) { 7197 logAuditEvent(success, "removeCacheDirective", null, null, 7198 null); 7199 } 7200 RetryCache.setState(cacheEntry, success); 7201 } 7202 getEditLog().logSync(); 7203 } 7204 7205 BatchedListEntries<CacheDirectiveEntry> listCacheDirectives( 7206 long startId, CacheDirectiveInfo filter) throws IOException { 7207 checkOperation(OperationCategory.READ); 7208 final FSPermissionChecker pc = isPermissionEnabled ? 7209 getPermissionChecker() : null; 7210 BatchedListEntries<CacheDirectiveEntry> results; 7211 cacheManager.waitForRescanIfNeeded(); 7212 readLock(); 7213 boolean success = false; 7214 try { 7215 checkOperation(OperationCategory.READ); 7216 results = 7217 cacheManager.listCacheDirectives(startId, filter, pc); 7218 success = true; 7219 } finally { 7220 readUnlock(); 7221 if (isAuditEnabled() && isExternalInvocation()) { 7222 logAuditEvent(success, "listCacheDirectives", null, null, 7223 null); 7224 } 7225 } 7226 return results; 7227 } 7228 7229 public void addCachePool(CachePoolInfo req) throws IOException { 7230 checkOperation(OperationCategory.WRITE); 7231 final FSPermissionChecker pc = isPermissionEnabled ? 7232 getPermissionChecker() : null; 7233 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7234 if (cacheEntry != null && cacheEntry.isSuccess()) { 7235 return; // Return previous response 7236 } 7237 writeLock(); 7238 boolean success = false; 7239 try { 7240 checkOperation(OperationCategory.WRITE); 7241 if (isInSafeMode()) { 7242 throw new SafeModeException( 7243 "Cannot add cache pool " + req.getPoolName(), safeMode); 7244 } 7245 if (pc != null) { 7246 pc.checkSuperuserPrivilege(); 7247 } 7248 CachePoolInfo info = cacheManager.addCachePool(req); 7249 getEditLog().logAddCachePool(info, cacheEntry != null); 7250 success = true; 7251 } finally { 7252 writeUnlock(); 7253 if (isAuditEnabled() && isExternalInvocation()) { 7254 logAuditEvent(success, "addCachePool", req.getPoolName(), null, null); 7255 } 7256 RetryCache.setState(cacheEntry, success); 7257 } 7258 7259 getEditLog().logSync(); 7260 } 7261 7262 public void modifyCachePool(CachePoolInfo req) throws IOException { 7263 checkOperation(OperationCategory.WRITE); 7264 final FSPermissionChecker pc = 7265 isPermissionEnabled ? getPermissionChecker() : null; 7266 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7267 if (cacheEntry != null && cacheEntry.isSuccess()) { 7268 return; // Return previous response 7269 } 7270 writeLock(); 7271 boolean success = false; 7272 try { 7273 checkOperation(OperationCategory.WRITE); 7274 if (isInSafeMode()) { 7275 throw new SafeModeException( 7276 "Cannot modify cache pool " + req.getPoolName(), safeMode); 7277 } 7278 if (pc != null) { 7279 pc.checkSuperuserPrivilege(); 7280 } 7281 cacheManager.modifyCachePool(req); 7282 getEditLog().logModifyCachePool(req, cacheEntry != null); 7283 success = true; 7284 } finally { 7285 writeUnlock(); 7286 if (isAuditEnabled() && isExternalInvocation()) { 7287 logAuditEvent(success, "modifyCachePool", req.getPoolName(), null, null); 7288 } 7289 RetryCache.setState(cacheEntry, success); 7290 } 7291 7292 getEditLog().logSync(); 7293 } 7294 7295 public void removeCachePool(String cachePoolName) throws IOException { 7296 checkOperation(OperationCategory.WRITE); 7297 final FSPermissionChecker pc = 7298 isPermissionEnabled ? getPermissionChecker() : null; 7299 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7300 if (cacheEntry != null && cacheEntry.isSuccess()) { 7301 return; // Return previous response 7302 } 7303 writeLock(); 7304 boolean success = false; 7305 try { 7306 checkOperation(OperationCategory.WRITE); 7307 if (isInSafeMode()) { 7308 throw new SafeModeException( 7309 "Cannot remove cache pool " + cachePoolName, safeMode); 7310 } 7311 if (pc != null) { 7312 pc.checkSuperuserPrivilege(); 7313 } 7314 cacheManager.removeCachePool(cachePoolName); 7315 getEditLog().logRemoveCachePool(cachePoolName, cacheEntry != null); 7316 success = true; 7317 } finally { 7318 writeUnlock(); 7319 if (isAuditEnabled() && isExternalInvocation()) { 7320 logAuditEvent(success, "removeCachePool", cachePoolName, null, null); 7321 } 7322 RetryCache.setState(cacheEntry, success); 7323 } 7324 7325 getEditLog().logSync(); 7326 } 7327 7328 public BatchedListEntries<CachePoolEntry> listCachePools(String prevKey) 7329 throws IOException { 7330 final FSPermissionChecker pc = 7331 isPermissionEnabled ? getPermissionChecker() : null; 7332 BatchedListEntries<CachePoolEntry> results; 7333 checkOperation(OperationCategory.READ); 7334 boolean success = false; 7335 cacheManager.waitForRescanIfNeeded(); 7336 readLock(); 7337 try { 7338 checkOperation(OperationCategory.READ); 7339 results = cacheManager.listCachePools(pc, prevKey); 7340 success = true; 7341 } finally { 7342 readUnlock(); 7343 if (isAuditEnabled() && isExternalInvocation()) { 7344 logAuditEvent(success, "listCachePools", null, null, null); 7345 } 7346 } 7347 return results; 7348 } 7349 7350 /** 7351 * Default AuditLogger implementation; used when no access logger is 7352 * defined in the config file. It can also be explicitly listed in the 7353 * config file. 7354 */ 7355 private static class DefaultAuditLogger extends HdfsAuditLogger { 7356 7357 private boolean logTokenTrackingId; 7358 7359 @Override 7360 public void initialize(Configuration conf) { 7361 logTokenTrackingId = conf.getBoolean( 7362 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 7363 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT); 7364 } 7365 7366 @Override 7367 public void logAuditEvent(boolean succeeded, String userName, 7368 InetAddress addr, String cmd, String src, String dst, 7369 FileStatus status, UserGroupInformation ugi, 7370 DelegationTokenSecretManager dtSecretManager) { 7371 if (auditLog.isInfoEnabled()) { 7372 final StringBuilder sb = auditBuffer.get(); 7373 sb.setLength(0); 7374 sb.append("allowed=").append(succeeded).append("\t"); 7375 sb.append("ugi=").append(userName).append("\t"); 7376 sb.append("ip=").append(addr).append("\t"); 7377 sb.append("cmd=").append(cmd).append("\t"); 7378 sb.append("src=").append(src).append("\t"); 7379 sb.append("dst=").append(dst).append("\t"); 7380 if (null == status) { 7381 sb.append("perm=null"); 7382 } else { 7383 sb.append("perm="); 7384 sb.append(status.getOwner()).append(":"); 7385 sb.append(status.getGroup()).append(":"); 7386 sb.append(status.getPermission()); 7387 } 7388 if (logTokenTrackingId) { 7389 sb.append("\t").append("trackingId="); 7390 String trackingId = null; 7391 if (ugi != null && dtSecretManager != null 7392 && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) { 7393 for (TokenIdentifier tid: ugi.getTokenIdentifiers()) { 7394 if (tid instanceof DelegationTokenIdentifier) { 7395 DelegationTokenIdentifier dtid = 7396 (DelegationTokenIdentifier)tid; 7397 trackingId = dtSecretManager.getTokenTrackingId(dtid); 7398 break; 7399 } 7400 } 7401 } 7402 sb.append(trackingId); 7403 } 7404 logAuditMessage(sb.toString()); 7405 } 7406 } 7407 7408 public void logAuditMessage(String message) { 7409 auditLog.info(message); 7410 } 7411 } 7412 7413 private static void enableAsyncAuditLog() { 7414 if (!(auditLog instanceof Log4JLogger)) { 7415 LOG.warn("Log4j is required to enable async auditlog"); 7416 return; 7417 } 7418 Logger logger = ((Log4JLogger)auditLog).getLogger(); 7419 @SuppressWarnings("unchecked") 7420 List<Appender> appenders = Collections.list(logger.getAllAppenders()); 7421 // failsafe against trying to async it more than once 7422 if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) { 7423 AsyncAppender asyncAppender = new AsyncAppender(); 7424 // change logger to have an async appender containing all the 7425 // previously configured appenders 7426 for (Appender appender : appenders) { 7427 logger.removeAppender(appender); 7428 asyncAppender.addAppender(appender); 7429 } 7430 logger.addAppender(asyncAppender); 7431 } 7432 } 7433 } 7434