001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.hdfs.server.namenode; 019 020 import static org.apache.hadoop.util.Time.now; 021 022 import java.io.File; 023 import java.io.FilterInputStream; 024 import java.io.IOException; 025 import java.io.InputStream; 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.EnumMap; 029 import java.util.List; 030 031 import org.apache.commons.logging.Log; 032 import org.apache.commons.logging.LogFactory; 033 import org.apache.hadoop.classification.InterfaceAudience; 034 import org.apache.hadoop.classification.InterfaceStability; 035 import org.apache.hadoop.fs.FileSystem; 036 import org.apache.hadoop.hdfs.protocol.Block; 037 import org.apache.hadoop.hdfs.protocol.HdfsConstants; 038 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 039 import org.apache.hadoop.hdfs.protocol.LayoutVersion; 040 import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; 041 import org.apache.hadoop.hdfs.protocol.LocatedBlock; 042 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; 043 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; 044 import org.apache.hadoop.hdfs.server.common.Storage; 045 import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream.LogHeaderCorruptException; 046 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp; 047 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp; 048 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp; 049 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp; 050 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp; 051 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp; 052 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp; 053 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CreateSnapshotOp; 054 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp; 055 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteSnapshotOp; 056 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp; 057 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp; 058 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp; 059 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp; 060 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp; 061 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp; 062 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp; 063 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp; 064 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op; 065 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op; 066 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp; 067 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp; 068 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp; 069 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp; 070 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp; 071 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp; 072 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp; 073 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp; 074 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp; 075 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; 076 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 077 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 078 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 079 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 080 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 081 import org.apache.hadoop.hdfs.util.Holder; 082 083 import com.google.common.base.Joiner; 084 085 @InterfaceAudience.Private 086 @InterfaceStability.Evolving 087 public class FSEditLogLoader { 088 static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName()); 089 static long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec 090 private final FSNamesystem fsNamesys; 091 private long lastAppliedTxId; 092 093 public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) { 094 this.fsNamesys = fsNamesys; 095 this.lastAppliedTxId = lastAppliedTxId; 096 } 097 098 /** 099 * Load an edit log, and apply the changes to the in-memory structure 100 * This is where we apply edits that we've been writing to disk all 101 * along. 102 */ 103 long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId, 104 MetaRecoveryContext recovery) throws IOException { 105 StartupProgress prog = NameNode.getStartupProgress(); 106 Step step = createStartupProgressStep(edits); 107 prog.beginStep(Phase.LOADING_EDITS, step); 108 fsNamesys.writeLock(); 109 try { 110 long startTime = now(); 111 FSImage.LOG.info("Start loading edits file " + edits.getName()); 112 long numEdits = loadEditRecords(edits, false, 113 expectedStartingTxId, recovery); 114 FSImage.LOG.info("Edits file " + edits.getName() 115 + " of size " + edits.length() + " edits # " + numEdits 116 + " loaded in " + (now()-startTime)/1000 + " seconds"); 117 return numEdits; 118 } finally { 119 edits.close(); 120 fsNamesys.writeUnlock(); 121 prog.endStep(Phase.LOADING_EDITS, step); 122 } 123 } 124 125 long loadEditRecords(EditLogInputStream in, boolean closeOnExit, 126 long expectedStartingTxId, MetaRecoveryContext recovery) 127 throws IOException { 128 FSDirectory fsDir = fsNamesys.dir; 129 130 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts = 131 new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class); 132 133 if (LOG.isTraceEnabled()) { 134 LOG.trace("Acquiring write lock to replay edit log"); 135 } 136 137 fsNamesys.writeLock(); 138 fsDir.writeLock(); 139 140 long recentOpcodeOffsets[] = new long[4]; 141 Arrays.fill(recentOpcodeOffsets, -1); 142 143 long expectedTxId = expectedStartingTxId; 144 long numEdits = 0; 145 long lastTxId = in.getLastTxId(); 146 long numTxns = (lastTxId - expectedStartingTxId) + 1; 147 StartupProgress prog = NameNode.getStartupProgress(); 148 Step step = createStartupProgressStep(in); 149 prog.setTotal(Phase.LOADING_EDITS, step, numTxns); 150 Counter counter = prog.getCounter(Phase.LOADING_EDITS, step); 151 long lastLogTime = now(); 152 long lastInodeId = fsNamesys.getLastInodeId(); 153 154 try { 155 while (true) { 156 try { 157 FSEditLogOp op; 158 try { 159 op = in.readOp(); 160 if (op == null) { 161 break; 162 } 163 } catch (Throwable e) { 164 // Handle a problem with our input 165 check203UpgradeFailure(in.getVersion(), e); 166 String errorMessage = 167 formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId); 168 FSImage.LOG.error(errorMessage, e); 169 if (recovery == null) { 170 // We will only try to skip over problematic opcodes when in 171 // recovery mode. 172 throw new EditLogInputException(errorMessage, e, numEdits); 173 } 174 MetaRecoveryContext.editLogLoaderPrompt( 175 "We failed to read txId " + expectedTxId, 176 recovery, "skipping the bad section in the log"); 177 in.resync(); 178 continue; 179 } 180 recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] = 181 in.getPosition(); 182 if (op.hasTransactionId()) { 183 if (op.getTransactionId() > expectedTxId) { 184 MetaRecoveryContext.editLogLoaderPrompt("There appears " + 185 "to be a gap in the edit log. We expected txid " + 186 expectedTxId + ", but got txid " + 187 op.getTransactionId() + ".", recovery, "ignoring missing " + 188 " transaction IDs"); 189 } else if (op.getTransactionId() < expectedTxId) { 190 MetaRecoveryContext.editLogLoaderPrompt("There appears " + 191 "to be an out-of-order edit in the edit log. We " + 192 "expected txid " + expectedTxId + ", but got txid " + 193 op.getTransactionId() + ".", recovery, 194 "skipping the out-of-order edit"); 195 continue; 196 } 197 } 198 try { 199 long inodeId = applyEditLogOp(op, fsDir, in.getVersion(), lastInodeId); 200 if (lastInodeId < inodeId) { 201 lastInodeId = inodeId; 202 } 203 } catch (Throwable e) { 204 LOG.error("Encountered exception on operation " + op, e); 205 MetaRecoveryContext.editLogLoaderPrompt("Failed to " + 206 "apply edit log operation " + op + ": error " + 207 e.getMessage(), recovery, "applying edits"); 208 } 209 // Now that the operation has been successfully decoded and 210 // applied, update our bookkeeping. 211 incrOpCount(op.opCode, opCounts, step, counter); 212 if (op.hasTransactionId()) { 213 lastAppliedTxId = op.getTransactionId(); 214 expectedTxId = lastAppliedTxId + 1; 215 } else { 216 expectedTxId = lastAppliedTxId = expectedStartingTxId; 217 } 218 // log progress 219 if (op.hasTransactionId()) { 220 long now = now(); 221 if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) { 222 long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1; 223 int percent = Math.round((float) deltaTxId / numTxns * 100); 224 LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns 225 + " transactions completed. (" + percent + "%)"); 226 lastLogTime = now; 227 } 228 } 229 numEdits++; 230 } catch (MetaRecoveryContext.RequestStopException e) { 231 MetaRecoveryContext.LOG.warn("Stopped reading edit log at " + 232 in.getPosition() + "/" + in.length()); 233 break; 234 } 235 } 236 } finally { 237 fsNamesys.resetLastInodeId(lastInodeId); 238 if(closeOnExit) { 239 in.close(); 240 } 241 fsDir.writeUnlock(); 242 fsNamesys.writeUnlock(); 243 244 if (LOG.isTraceEnabled()) { 245 LOG.trace("replaying edit log finished"); 246 } 247 248 if (FSImage.LOG.isDebugEnabled()) { 249 dumpOpCounts(opCounts); 250 } 251 } 252 return numEdits; 253 } 254 255 // allocate and update last allocated inode id 256 private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion, 257 long lastInodeId) throws IOException { 258 long inodeId = inodeIdFromOp; 259 260 if (inodeId == INodeId.GRANDFATHER_INODE_ID) { 261 if (LayoutVersion.supports(Feature.ADD_INODE_ID, logVersion)) { 262 throw new IOException("The layout version " + logVersion 263 + " supports inodeId but gave bogus inodeId"); 264 } 265 inodeId = fsNamesys.allocateNewInodeId(); 266 } else { 267 // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from 268 // fsimage but editlog captures more recent inodeId allocations 269 if (inodeId > lastInodeId) { 270 fsNamesys.resetLastInodeId(inodeId); 271 } 272 } 273 return inodeId; 274 } 275 276 @SuppressWarnings("deprecation") 277 private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, 278 int logVersion, long lastInodeId) throws IOException { 279 long inodeId = INodeId.GRANDFATHER_INODE_ID; 280 if (LOG.isTraceEnabled()) { 281 LOG.trace("replaying edit log: " + op); 282 } 283 final boolean toAddRetryCache = fsNamesys.hasRetryCache() && op.hasRpcIds(); 284 285 switch (op.opCode) { 286 case OP_ADD: { 287 AddCloseOp addCloseOp = (AddCloseOp)op; 288 if (FSNamesystem.LOG.isDebugEnabled()) { 289 FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path + 290 " numblocks : " + addCloseOp.blocks.length + 291 " clientHolder " + addCloseOp.clientName + 292 " clientMachine " + addCloseOp.clientMachine); 293 } 294 // There three cases here: 295 // 1. OP_ADD to create a new file 296 // 2. OP_ADD to update file blocks 297 // 3. OP_ADD to open file for append 298 299 // See if the file already exists (persistBlocks call) 300 final INodesInPath iip = fsDir.getLastINodeInPath(addCloseOp.path); 301 final INodeFile oldFile = INodeFile.valueOf( 302 iip.getINode(0), addCloseOp.path, true); 303 INodeFile newFile = oldFile; 304 if (oldFile == null) { // this is OP_ADD on a new file (case 1) 305 // versions > 0 support per file replication 306 // get name and replication 307 final short replication = fsNamesys.getBlockManager() 308 .adjustReplication(addCloseOp.replication); 309 assert addCloseOp.blocks.length == 0; 310 311 // add to the file tree 312 inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion, 313 lastInodeId); 314 newFile = fsDir.unprotectedAddFile(inodeId, 315 addCloseOp.path, addCloseOp.permissions, replication, 316 addCloseOp.mtime, addCloseOp.atime, addCloseOp.blockSize, true, 317 addCloseOp.clientName, addCloseOp.clientMachine); 318 fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path); 319 320 // add the op into retry cache if necessary 321 if (toAddRetryCache) { 322 HdfsFileStatus stat = fsNamesys.dir.createFileStatus( 323 HdfsFileStatus.EMPTY_NAME, newFile, null); 324 fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId, 325 addCloseOp.rpcCallId, stat); 326 } 327 } else { // This is OP_ADD on an existing file 328 if (!oldFile.isUnderConstruction()) { 329 // This is case 3: a call to append() on an already-closed file. 330 if (FSNamesystem.LOG.isDebugEnabled()) { 331 FSNamesystem.LOG.debug("Reopening an already-closed file " + 332 "for append"); 333 } 334 LocatedBlock lb = fsNamesys.prepareFileForWrite(addCloseOp.path, 335 oldFile, addCloseOp.clientName, addCloseOp.clientMachine, null, 336 false, iip.getLatestSnapshot(), false); 337 newFile = INodeFile.valueOf(fsDir.getINode(addCloseOp.path), 338 addCloseOp.path, true); 339 340 // add the op into retry cache is necessary 341 if (toAddRetryCache) { 342 fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId, 343 addCloseOp.rpcCallId, lb); 344 } 345 } 346 } 347 // Fall-through for case 2. 348 // Regardless of whether it's a new file or an updated file, 349 // update the block list. 350 351 // Update the salient file attributes. 352 newFile.setAccessTime(addCloseOp.atime, null, fsDir.getINodeMap()); 353 newFile.setModificationTime(addCloseOp.mtime, null, fsDir.getINodeMap()); 354 updateBlocks(fsDir, addCloseOp, newFile); 355 break; 356 } 357 case OP_CLOSE: { 358 AddCloseOp addCloseOp = (AddCloseOp)op; 359 360 if (FSNamesystem.LOG.isDebugEnabled()) { 361 FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path + 362 " numblocks : " + addCloseOp.blocks.length + 363 " clientHolder " + addCloseOp.clientName + 364 " clientMachine " + addCloseOp.clientMachine); 365 } 366 367 final INodesInPath iip = fsDir.getLastINodeInPath(addCloseOp.path); 368 final INodeFile oldFile = INodeFile.valueOf(iip.getINode(0), addCloseOp.path); 369 370 // Update the salient file attributes. 371 oldFile.setAccessTime(addCloseOp.atime, null, fsDir.getINodeMap()); 372 oldFile.setModificationTime(addCloseOp.mtime, null, fsDir.getINodeMap()); 373 updateBlocks(fsDir, addCloseOp, oldFile); 374 375 // Now close the file 376 if (!oldFile.isUnderConstruction() && 377 logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) { 378 // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE 379 // could show up twice in a row. But after that version, this 380 // should be fixed, so we should treat it as an error. 381 throw new IOException( 382 "File is not under construction: " + addCloseOp.path); 383 } 384 // One might expect that you could use removeLease(holder, path) here, 385 // but OP_CLOSE doesn't serialize the holder. So, remove by path. 386 if (oldFile.isUnderConstruction()) { 387 INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile; 388 fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path); 389 INodeFile newFile = ucFile.toINodeFile(ucFile.getModificationTime()); 390 fsDir.unprotectedReplaceINodeFile(addCloseOp.path, ucFile, newFile); 391 } 392 break; 393 } 394 case OP_UPDATE_BLOCKS: { 395 UpdateBlocksOp updateOp = (UpdateBlocksOp)op; 396 if (FSNamesystem.LOG.isDebugEnabled()) { 397 FSNamesystem.LOG.debug(op.opCode + ": " + updateOp.path + 398 " numblocks : " + updateOp.blocks.length); 399 } 400 INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(updateOp.path), 401 updateOp.path); 402 // Update in-memory data structures 403 updateBlocks(fsDir, updateOp, oldFile); 404 405 if (toAddRetryCache) { 406 fsNamesys.addCacheEntry(updateOp.rpcClientId, updateOp.rpcCallId); 407 } 408 break; 409 } 410 411 case OP_SET_REPLICATION: { 412 SetReplicationOp setReplicationOp = (SetReplicationOp)op; 413 short replication = fsNamesys.getBlockManager().adjustReplication( 414 setReplicationOp.replication); 415 fsDir.unprotectedSetReplication(setReplicationOp.path, 416 replication, null); 417 break; 418 } 419 case OP_CONCAT_DELETE: { 420 ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op; 421 fsDir.unprotectedConcat(concatDeleteOp.trg, concatDeleteOp.srcs, 422 concatDeleteOp.timestamp); 423 424 if (toAddRetryCache) { 425 fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId, 426 concatDeleteOp.rpcCallId); 427 } 428 break; 429 } 430 case OP_RENAME_OLD: { 431 RenameOldOp renameOp = (RenameOldOp)op; 432 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, 433 renameOp.timestamp); 434 435 if (toAddRetryCache) { 436 fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId); 437 } 438 break; 439 } 440 case OP_DELETE: { 441 DeleteOp deleteOp = (DeleteOp)op; 442 fsDir.unprotectedDelete(deleteOp.path, deleteOp.timestamp); 443 444 if (toAddRetryCache) { 445 fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId); 446 } 447 break; 448 } 449 case OP_MKDIR: { 450 MkdirOp mkdirOp = (MkdirOp)op; 451 inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion, 452 lastInodeId); 453 fsDir.unprotectedMkdir(inodeId, mkdirOp.path, mkdirOp.permissions, 454 mkdirOp.timestamp); 455 break; 456 } 457 case OP_SET_GENSTAMP_V1: { 458 SetGenstampV1Op setGenstampV1Op = (SetGenstampV1Op)op; 459 fsNamesys.setGenerationStampV1(setGenstampV1Op.genStampV1); 460 break; 461 } 462 case OP_SET_PERMISSIONS: { 463 SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op; 464 fsDir.unprotectedSetPermission(setPermissionsOp.src, 465 setPermissionsOp.permissions); 466 break; 467 } 468 case OP_SET_OWNER: { 469 SetOwnerOp setOwnerOp = (SetOwnerOp)op; 470 fsDir.unprotectedSetOwner(setOwnerOp.src, setOwnerOp.username, 471 setOwnerOp.groupname); 472 break; 473 } 474 case OP_SET_NS_QUOTA: { 475 SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op; 476 fsDir.unprotectedSetQuota(setNSQuotaOp.src, 477 setNSQuotaOp.nsQuota, 478 HdfsConstants.QUOTA_DONT_SET); 479 break; 480 } 481 case OP_CLEAR_NS_QUOTA: { 482 ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op; 483 fsDir.unprotectedSetQuota(clearNSQuotaOp.src, 484 HdfsConstants.QUOTA_RESET, 485 HdfsConstants.QUOTA_DONT_SET); 486 break; 487 } 488 489 case OP_SET_QUOTA: 490 SetQuotaOp setQuotaOp = (SetQuotaOp)op; 491 fsDir.unprotectedSetQuota(setQuotaOp.src, 492 setQuotaOp.nsQuota, 493 setQuotaOp.dsQuota); 494 break; 495 496 case OP_TIMES: { 497 TimesOp timesOp = (TimesOp)op; 498 499 fsDir.unprotectedSetTimes(timesOp.path, 500 timesOp.mtime, 501 timesOp.atime, true); 502 break; 503 } 504 case OP_SYMLINK: { 505 if (!FileSystem.isSymlinksEnabled()) { 506 throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS"); 507 } 508 SymlinkOp symlinkOp = (SymlinkOp)op; 509 inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion, 510 lastInodeId); 511 fsDir.unprotectedAddSymlink(inodeId, symlinkOp.path, 512 symlinkOp.value, symlinkOp.mtime, 513 symlinkOp.atime, symlinkOp.permissionStatus); 514 515 if (toAddRetryCache) { 516 fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId); 517 } 518 break; 519 } 520 case OP_RENAME: { 521 RenameOp renameOp = (RenameOp)op; 522 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, 523 renameOp.timestamp, renameOp.options); 524 525 if (toAddRetryCache) { 526 fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId); 527 } 528 break; 529 } 530 case OP_GET_DELEGATION_TOKEN: { 531 GetDelegationTokenOp getDelegationTokenOp 532 = (GetDelegationTokenOp)op; 533 534 fsNamesys.getDelegationTokenSecretManager() 535 .addPersistedDelegationToken(getDelegationTokenOp.token, 536 getDelegationTokenOp.expiryTime); 537 break; 538 } 539 case OP_RENEW_DELEGATION_TOKEN: { 540 RenewDelegationTokenOp renewDelegationTokenOp 541 = (RenewDelegationTokenOp)op; 542 fsNamesys.getDelegationTokenSecretManager() 543 .updatePersistedTokenRenewal(renewDelegationTokenOp.token, 544 renewDelegationTokenOp.expiryTime); 545 break; 546 } 547 case OP_CANCEL_DELEGATION_TOKEN: { 548 CancelDelegationTokenOp cancelDelegationTokenOp 549 = (CancelDelegationTokenOp)op; 550 fsNamesys.getDelegationTokenSecretManager() 551 .updatePersistedTokenCancellation( 552 cancelDelegationTokenOp.token); 553 break; 554 } 555 case OP_UPDATE_MASTER_KEY: { 556 UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op; 557 fsNamesys.getDelegationTokenSecretManager() 558 .updatePersistedMasterKey(updateMasterKeyOp.key); 559 break; 560 } 561 case OP_REASSIGN_LEASE: { 562 ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op; 563 564 Lease lease = fsNamesys.leaseManager.getLease( 565 reassignLeaseOp.leaseHolder); 566 INodeFileUnderConstruction pendingFile = 567 INodeFileUnderConstruction.valueOf( 568 fsDir.getINode(reassignLeaseOp.path), reassignLeaseOp.path); 569 fsNamesys.reassignLeaseInternal(lease, 570 reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile); 571 break; 572 } 573 case OP_START_LOG_SEGMENT: 574 case OP_END_LOG_SEGMENT: { 575 // no data in here currently. 576 break; 577 } 578 case OP_CREATE_SNAPSHOT: { 579 CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op; 580 String path = fsNamesys.getSnapshotManager().createSnapshot( 581 createSnapshotOp.snapshotRoot, createSnapshotOp.snapshotName); 582 if (toAddRetryCache) { 583 fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId, 584 createSnapshotOp.rpcCallId, path); 585 } 586 break; 587 } 588 case OP_DELETE_SNAPSHOT: { 589 DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op; 590 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); 591 List<INode> removedINodes = new ArrayList<INode>(); 592 fsNamesys.getSnapshotManager().deleteSnapshot( 593 deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName, 594 collectedBlocks, removedINodes); 595 fsNamesys.removeBlocks(collectedBlocks); 596 collectedBlocks.clear(); 597 fsNamesys.dir.removeFromInodeMap(removedINodes); 598 removedINodes.clear(); 599 600 if (toAddRetryCache) { 601 fsNamesys.addCacheEntry(deleteSnapshotOp.rpcClientId, 602 deleteSnapshotOp.rpcCallId); 603 } 604 break; 605 } 606 case OP_RENAME_SNAPSHOT: { 607 RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op; 608 fsNamesys.getSnapshotManager().renameSnapshot( 609 renameSnapshotOp.snapshotRoot, renameSnapshotOp.snapshotOldName, 610 renameSnapshotOp.snapshotNewName); 611 612 if (toAddRetryCache) { 613 fsNamesys.addCacheEntry(renameSnapshotOp.rpcClientId, 614 renameSnapshotOp.rpcCallId); 615 } 616 break; 617 } 618 case OP_ALLOW_SNAPSHOT: { 619 AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op; 620 fsNamesys.getSnapshotManager().setSnapshottable( 621 allowSnapshotOp.snapshotRoot, false); 622 break; 623 } 624 case OP_DISALLOW_SNAPSHOT: { 625 DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op; 626 fsNamesys.getSnapshotManager().resetSnapshottable( 627 disallowSnapshotOp.snapshotRoot); 628 break; 629 } 630 case OP_SET_GENSTAMP_V2: { 631 SetGenstampV2Op setGenstampV2Op = (SetGenstampV2Op) op; 632 fsNamesys.setGenerationStampV2(setGenstampV2Op.genStampV2); 633 break; 634 } 635 case OP_ALLOCATE_BLOCK_ID: { 636 AllocateBlockIdOp allocateBlockIdOp = (AllocateBlockIdOp) op; 637 fsNamesys.setLastAllocatedBlockId(allocateBlockIdOp.blockId); 638 break; 639 } 640 default: 641 throw new IOException("Invalid operation read " + op.opCode); 642 } 643 return inodeId; 644 } 645 646 private static String formatEditLogReplayError(EditLogInputStream in, 647 long recentOpcodeOffsets[], long txid) { 648 StringBuilder sb = new StringBuilder(); 649 sb.append("Error replaying edit log at offset " + in.getPosition()); 650 sb.append(". Expected transaction ID was ").append(txid); 651 if (recentOpcodeOffsets[0] != -1) { 652 Arrays.sort(recentOpcodeOffsets); 653 sb.append("\nRecent opcode offsets:"); 654 for (long offset : recentOpcodeOffsets) { 655 if (offset != -1) { 656 sb.append(' ').append(offset); 657 } 658 } 659 } 660 return sb.toString(); 661 } 662 663 /** 664 * Update in-memory data structures with new block information. 665 * @throws IOException 666 */ 667 private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op, 668 INodeFile file) throws IOException { 669 // Update its block list 670 BlockInfo[] oldBlocks = file.getBlocks(); 671 Block[] newBlocks = op.getBlocks(); 672 String path = op.getPath(); 673 674 // Are we only updating the last block's gen stamp. 675 boolean isGenStampUpdate = oldBlocks.length == newBlocks.length; 676 677 // First, update blocks in common 678 for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) { 679 BlockInfo oldBlock = oldBlocks[i]; 680 Block newBlock = newBlocks[i]; 681 682 boolean isLastBlock = i == newBlocks.length - 1; 683 if (oldBlock.getBlockId() != newBlock.getBlockId() || 684 (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 685 !(isGenStampUpdate && isLastBlock))) { 686 throw new IOException("Mismatched block IDs or generation stamps, " + 687 "attempting to replace block " + oldBlock + " with " + newBlock + 688 " as block # " + i + "/" + newBlocks.length + " of " + 689 path); 690 } 691 692 oldBlock.setNumBytes(newBlock.getNumBytes()); 693 boolean changeMade = 694 oldBlock.getGenerationStamp() != newBlock.getGenerationStamp(); 695 oldBlock.setGenerationStamp(newBlock.getGenerationStamp()); 696 697 if (oldBlock instanceof BlockInfoUnderConstruction && 698 (!isLastBlock || op.shouldCompleteLastBlock())) { 699 changeMade = true; 700 fsNamesys.getBlockManager().forceCompleteBlock( 701 (INodeFileUnderConstruction)file, 702 (BlockInfoUnderConstruction)oldBlock); 703 } 704 if (changeMade) { 705 // The state or gen-stamp of the block has changed. So, we may be 706 // able to process some messages from datanodes that we previously 707 // were unable to process. 708 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 709 } 710 } 711 712 if (newBlocks.length < oldBlocks.length) { 713 // We're removing a block from the file, e.g. abandonBlock(...) 714 if (!file.isUnderConstruction()) { 715 throw new IOException("Trying to remove a block from file " + 716 path + " which is not under construction."); 717 } 718 if (newBlocks.length != oldBlocks.length - 1) { 719 throw new IOException("Trying to remove more than one block from file " 720 + path); 721 } 722 Block oldBlock = oldBlocks[oldBlocks.length - 1]; 723 boolean removed = fsDir.unprotectedRemoveBlock(path, 724 (INodeFileUnderConstruction) file, oldBlock); 725 if (!removed && !(op instanceof UpdateBlocksOp)) { 726 throw new IOException("Trying to delete non-existant block " + oldBlock); 727 } 728 } else if (newBlocks.length > oldBlocks.length) { 729 // We're adding blocks 730 for (int i = oldBlocks.length; i < newBlocks.length; i++) { 731 Block newBlock = newBlocks[i]; 732 BlockInfo newBI; 733 if (!op.shouldCompleteLastBlock()) { 734 // TODO: shouldn't this only be true for the last block? 735 // what about an old-version fsync() where fsync isn't called 736 // until several blocks in? 737 newBI = new BlockInfoUnderConstruction( 738 newBlock, file.getBlockReplication()); 739 } else { 740 // OP_CLOSE should add finalized blocks. This code path 741 // is only executed when loading edits written by prior 742 // versions of Hadoop. Current versions always log 743 // OP_ADD operations as each block is allocated. 744 newBI = new BlockInfo(newBlock, file.getBlockReplication()); 745 } 746 fsNamesys.getBlockManager().addBlockCollection(newBI, file); 747 file.addBlock(newBI); 748 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 749 } 750 } 751 } 752 753 private static void dumpOpCounts( 754 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) { 755 StringBuilder sb = new StringBuilder(); 756 sb.append("Summary of operations loaded from edit log:\n "); 757 Joiner.on("\n ").withKeyValueSeparator("=").appendTo(sb, opCounts); 758 FSImage.LOG.debug(sb.toString()); 759 } 760 761 private void incrOpCount(FSEditLogOpCodes opCode, 762 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts, Step step, 763 Counter counter) { 764 Holder<Integer> holder = opCounts.get(opCode); 765 if (holder == null) { 766 holder = new Holder<Integer>(1); 767 opCounts.put(opCode, holder); 768 } else { 769 holder.held++; 770 } 771 counter.increment(); 772 } 773 774 /** 775 * Throw appropriate exception during upgrade from 203, when editlog loading 776 * could fail due to opcode conflicts. 777 */ 778 private void check203UpgradeFailure(int logVersion, Throwable e) 779 throws IOException { 780 // 0.20.203 version version has conflicting opcodes with the later releases. 781 // The editlog must be emptied by restarting the namenode, before proceeding 782 // with the upgrade. 783 if (Storage.is203LayoutVersion(logVersion) 784 && logVersion != HdfsConstants.LAYOUT_VERSION) { 785 String msg = "During upgrade failed to load the editlog version " 786 + logVersion + " from release 0.20.203. Please go back to the old " 787 + " release and restart the namenode. This empties the editlog " 788 + " and saves the namespace. Resume the upgrade after this step."; 789 throw new IOException(msg, e); 790 } 791 } 792 793 /** 794 * Find the last valid transaction ID in the stream. 795 * If there are invalid or corrupt transactions in the middle of the stream, 796 * validateEditLog will skip over them. 797 * This reads through the stream but does not close it. 798 * 799 * @throws IOException if the stream cannot be read due to an IO error (eg 800 * if the log does not exist) 801 */ 802 static EditLogValidation validateEditLog(EditLogInputStream in) { 803 long lastPos = 0; 804 long lastTxId = HdfsConstants.INVALID_TXID; 805 long numValid = 0; 806 FSEditLogOp op = null; 807 while (true) { 808 lastPos = in.getPosition(); 809 try { 810 if ((op = in.readOp()) == null) { 811 break; 812 } 813 } catch (Throwable t) { 814 FSImage.LOG.warn("Caught exception after reading " + numValid + 815 " ops from " + in + " while determining its valid length." + 816 "Position was " + lastPos, t); 817 in.resync(); 818 FSImage.LOG.warn("After resync, position is " + in.getPosition()); 819 continue; 820 } 821 if (lastTxId == HdfsConstants.INVALID_TXID 822 || op.getTransactionId() > lastTxId) { 823 lastTxId = op.getTransactionId(); 824 } 825 numValid++; 826 } 827 return new EditLogValidation(lastPos, lastTxId, false); 828 } 829 830 static class EditLogValidation { 831 private final long validLength; 832 private final long endTxId; 833 private final boolean hasCorruptHeader; 834 835 EditLogValidation(long validLength, long endTxId, 836 boolean hasCorruptHeader) { 837 this.validLength = validLength; 838 this.endTxId = endTxId; 839 this.hasCorruptHeader = hasCorruptHeader; 840 } 841 842 long getValidLength() { return validLength; } 843 844 long getEndTxId() { return endTxId; } 845 846 boolean hasCorruptHeader() { return hasCorruptHeader; } 847 } 848 849 /** 850 * Stream wrapper that keeps track of the current stream position. 851 * 852 * This stream also allows us to set a limit on how many bytes we can read 853 * without getting an exception. 854 */ 855 public static class PositionTrackingInputStream extends FilterInputStream 856 implements StreamLimiter { 857 private long curPos = 0; 858 private long markPos = -1; 859 private long limitPos = Long.MAX_VALUE; 860 861 public PositionTrackingInputStream(InputStream is) { 862 super(is); 863 } 864 865 private void checkLimit(long amt) throws IOException { 866 long extra = (curPos + amt) - limitPos; 867 if (extra > 0) { 868 throw new IOException("Tried to read " + amt + " byte(s) past " + 869 "the limit at offset " + limitPos); 870 } 871 } 872 873 @Override 874 public int read() throws IOException { 875 checkLimit(1); 876 int ret = super.read(); 877 if (ret != -1) curPos++; 878 return ret; 879 } 880 881 @Override 882 public int read(byte[] data) throws IOException { 883 checkLimit(data.length); 884 int ret = super.read(data); 885 if (ret > 0) curPos += ret; 886 return ret; 887 } 888 889 @Override 890 public int read(byte[] data, int offset, int length) throws IOException { 891 checkLimit(length); 892 int ret = super.read(data, offset, length); 893 if (ret > 0) curPos += ret; 894 return ret; 895 } 896 897 @Override 898 public void setLimit(long limit) { 899 limitPos = curPos + limit; 900 } 901 902 @Override 903 public void clearLimit() { 904 limitPos = Long.MAX_VALUE; 905 } 906 907 @Override 908 public void mark(int limit) { 909 super.mark(limit); 910 markPos = curPos; 911 } 912 913 @Override 914 public void reset() throws IOException { 915 if (markPos == -1) { 916 throw new IOException("Not marked!"); 917 } 918 super.reset(); 919 curPos = markPos; 920 markPos = -1; 921 } 922 923 public long getPos() { 924 return curPos; 925 } 926 927 @Override 928 public long skip(long amt) throws IOException { 929 long extra = (curPos + amt) - limitPos; 930 if (extra > 0) { 931 throw new IOException("Tried to skip " + extra + " bytes past " + 932 "the limit at offset " + limitPos); 933 } 934 long ret = super.skip(amt); 935 curPos += ret; 936 return ret; 937 } 938 } 939 940 public long getLastAppliedTxId() { 941 return lastAppliedTxId; 942 } 943 944 /** 945 * Creates a Step used for updating startup progress, populated with 946 * information from the given edits. The step always includes the log's name. 947 * If the log has a known length, then the length is included in the step too. 948 * 949 * @param edits EditLogInputStream to use for populating step 950 * @return Step populated with information from edits 951 * @throws IOException thrown if there is an I/O error 952 */ 953 private static Step createStartupProgressStep(EditLogInputStream edits) 954 throws IOException { 955 long length = edits.length(); 956 String name = edits.getCurrentStreamName(); 957 return length != -1 ? new Step(name, length) : new Step(name); 958 } 959 }