001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.hdfs.server.namenode; 019 020 import static org.apache.hadoop.util.Time.now; 021 022 import java.io.File; 023 import java.io.FilterInputStream; 024 import java.io.IOException; 025 import java.io.InputStream; 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.EnumMap; 029 import java.util.List; 030 031 import org.apache.commons.logging.Log; 032 import org.apache.commons.logging.LogFactory; 033 import org.apache.hadoop.classification.InterfaceAudience; 034 import org.apache.hadoop.classification.InterfaceStability; 035 import org.apache.hadoop.hdfs.protocol.Block; 036 import org.apache.hadoop.hdfs.protocol.HdfsConstants; 037 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 038 import org.apache.hadoop.hdfs.protocol.LayoutVersion; 039 import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; 040 import org.apache.hadoop.hdfs.protocol.LocatedBlock; 041 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; 042 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; 043 import org.apache.hadoop.hdfs.server.common.Storage; 044 import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream.LogHeaderCorruptException; 045 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp; 046 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp; 047 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp; 048 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp; 049 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp; 050 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp; 051 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp; 052 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CreateSnapshotOp; 053 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp; 054 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteSnapshotOp; 055 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp; 056 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp; 057 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp; 058 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp; 059 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp; 060 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp; 061 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp; 062 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp; 063 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op; 064 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op; 065 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp; 066 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp; 067 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp; 068 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp; 069 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp; 070 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp; 071 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp; 072 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp; 073 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp; 074 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; 075 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 076 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 077 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 078 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 079 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 080 import org.apache.hadoop.hdfs.util.Holder; 081 082 import com.google.common.base.Joiner; 083 084 @InterfaceAudience.Private 085 @InterfaceStability.Evolving 086 public class FSEditLogLoader { 087 static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName()); 088 static long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec 089 private final FSNamesystem fsNamesys; 090 private long lastAppliedTxId; 091 092 public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) { 093 this.fsNamesys = fsNamesys; 094 this.lastAppliedTxId = lastAppliedTxId; 095 } 096 097 /** 098 * Load an edit log, and apply the changes to the in-memory structure 099 * This is where we apply edits that we've been writing to disk all 100 * along. 101 */ 102 long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId, 103 MetaRecoveryContext recovery) throws IOException { 104 StartupProgress prog = NameNode.getStartupProgress(); 105 Step step = createStartupProgressStep(edits); 106 prog.beginStep(Phase.LOADING_EDITS, step); 107 fsNamesys.writeLock(); 108 try { 109 long startTime = now(); 110 FSImage.LOG.info("Start loading edits file " + edits.getName()); 111 long numEdits = loadEditRecords(edits, false, 112 expectedStartingTxId, recovery); 113 FSImage.LOG.info("Edits file " + edits.getName() 114 + " of size " + edits.length() + " edits # " + numEdits 115 + " loaded in " + (now()-startTime)/1000 + " seconds"); 116 return numEdits; 117 } finally { 118 edits.close(); 119 fsNamesys.writeUnlock(); 120 prog.endStep(Phase.LOADING_EDITS, step); 121 } 122 } 123 124 long loadEditRecords(EditLogInputStream in, boolean closeOnExit, 125 long expectedStartingTxId, MetaRecoveryContext recovery) 126 throws IOException { 127 FSDirectory fsDir = fsNamesys.dir; 128 129 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts = 130 new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class); 131 132 if (LOG.isTraceEnabled()) { 133 LOG.trace("Acquiring write lock to replay edit log"); 134 } 135 136 fsNamesys.writeLock(); 137 fsDir.writeLock(); 138 139 long recentOpcodeOffsets[] = new long[4]; 140 Arrays.fill(recentOpcodeOffsets, -1); 141 142 long expectedTxId = expectedStartingTxId; 143 long numEdits = 0; 144 long lastTxId = in.getLastTxId(); 145 long numTxns = (lastTxId - expectedStartingTxId) + 1; 146 StartupProgress prog = NameNode.getStartupProgress(); 147 Step step = createStartupProgressStep(in); 148 prog.setTotal(Phase.LOADING_EDITS, step, numTxns); 149 Counter counter = prog.getCounter(Phase.LOADING_EDITS, step); 150 long lastLogTime = now(); 151 long lastInodeId = fsNamesys.getLastInodeId(); 152 153 try { 154 while (true) { 155 try { 156 FSEditLogOp op; 157 try { 158 op = in.readOp(); 159 if (op == null) { 160 break; 161 } 162 } catch (Throwable e) { 163 // Handle a problem with our input 164 check203UpgradeFailure(in.getVersion(), e); 165 String errorMessage = 166 formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId); 167 FSImage.LOG.error(errorMessage, e); 168 if (recovery == null) { 169 // We will only try to skip over problematic opcodes when in 170 // recovery mode. 171 throw new EditLogInputException(errorMessage, e, numEdits); 172 } 173 MetaRecoveryContext.editLogLoaderPrompt( 174 "We failed to read txId " + expectedTxId, 175 recovery, "skipping the bad section in the log"); 176 in.resync(); 177 continue; 178 } 179 recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] = 180 in.getPosition(); 181 if (op.hasTransactionId()) { 182 if (op.getTransactionId() > expectedTxId) { 183 MetaRecoveryContext.editLogLoaderPrompt("There appears " + 184 "to be a gap in the edit log. We expected txid " + 185 expectedTxId + ", but got txid " + 186 op.getTransactionId() + ".", recovery, "ignoring missing " + 187 " transaction IDs"); 188 } else if (op.getTransactionId() < expectedTxId) { 189 MetaRecoveryContext.editLogLoaderPrompt("There appears " + 190 "to be an out-of-order edit in the edit log. We " + 191 "expected txid " + expectedTxId + ", but got txid " + 192 op.getTransactionId() + ".", recovery, 193 "skipping the out-of-order edit"); 194 continue; 195 } 196 } 197 try { 198 long inodeId = applyEditLogOp(op, fsDir, in.getVersion(), lastInodeId); 199 if (lastInodeId < inodeId) { 200 lastInodeId = inodeId; 201 } 202 } catch (Throwable e) { 203 LOG.error("Encountered exception on operation " + op, e); 204 MetaRecoveryContext.editLogLoaderPrompt("Failed to " + 205 "apply edit log operation " + op + ": error " + 206 e.getMessage(), recovery, "applying edits"); 207 } 208 // Now that the operation has been successfully decoded and 209 // applied, update our bookkeeping. 210 incrOpCount(op.opCode, opCounts, step, counter); 211 if (op.hasTransactionId()) { 212 lastAppliedTxId = op.getTransactionId(); 213 expectedTxId = lastAppliedTxId + 1; 214 } else { 215 expectedTxId = lastAppliedTxId = expectedStartingTxId; 216 } 217 // log progress 218 if (op.hasTransactionId()) { 219 long now = now(); 220 if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) { 221 long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1; 222 int percent = Math.round((float) deltaTxId / numTxns * 100); 223 LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns 224 + " transactions completed. (" + percent + "%)"); 225 lastLogTime = now; 226 } 227 } 228 numEdits++; 229 } catch (MetaRecoveryContext.RequestStopException e) { 230 MetaRecoveryContext.LOG.warn("Stopped reading edit log at " + 231 in.getPosition() + "/" + in.length()); 232 break; 233 } 234 } 235 } finally { 236 fsNamesys.resetLastInodeId(lastInodeId); 237 if(closeOnExit) { 238 in.close(); 239 } 240 fsDir.writeUnlock(); 241 fsNamesys.writeUnlock(); 242 243 if (LOG.isTraceEnabled()) { 244 LOG.trace("replaying edit log finished"); 245 } 246 247 if (FSImage.LOG.isDebugEnabled()) { 248 dumpOpCounts(opCounts); 249 } 250 } 251 return numEdits; 252 } 253 254 // allocate and update last allocated inode id 255 private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion, 256 long lastInodeId) throws IOException { 257 long inodeId = inodeIdFromOp; 258 259 if (inodeId == INodeId.GRANDFATHER_INODE_ID) { 260 if (LayoutVersion.supports(Feature.ADD_INODE_ID, logVersion)) { 261 throw new IOException("The layout version " + logVersion 262 + " supports inodeId but gave bogus inodeId"); 263 } 264 inodeId = fsNamesys.allocateNewInodeId(); 265 } else { 266 // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from 267 // fsimage but editlog captures more recent inodeId allocations 268 if (inodeId > lastInodeId) { 269 fsNamesys.resetLastInodeId(inodeId); 270 } 271 } 272 return inodeId; 273 } 274 275 @SuppressWarnings("deprecation") 276 private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, 277 int logVersion, long lastInodeId) throws IOException { 278 long inodeId = INodeId.GRANDFATHER_INODE_ID; 279 if (LOG.isTraceEnabled()) { 280 LOG.trace("replaying edit log: " + op); 281 } 282 final boolean toAddRetryCache = fsNamesys.hasRetryCache() && op.hasRpcIds(); 283 284 switch (op.opCode) { 285 case OP_ADD: { 286 AddCloseOp addCloseOp = (AddCloseOp)op; 287 if (FSNamesystem.LOG.isDebugEnabled()) { 288 FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path + 289 " numblocks : " + addCloseOp.blocks.length + 290 " clientHolder " + addCloseOp.clientName + 291 " clientMachine " + addCloseOp.clientMachine); 292 } 293 // There three cases here: 294 // 1. OP_ADD to create a new file 295 // 2. OP_ADD to update file blocks 296 // 3. OP_ADD to open file for append 297 298 // See if the file already exists (persistBlocks call) 299 final INodesInPath iip = fsDir.getLastINodeInPath(addCloseOp.path); 300 final INodeFile oldFile = INodeFile.valueOf( 301 iip.getINode(0), addCloseOp.path, true); 302 INodeFile newFile = oldFile; 303 if (oldFile == null) { // this is OP_ADD on a new file (case 1) 304 // versions > 0 support per file replication 305 // get name and replication 306 final short replication = fsNamesys.getBlockManager() 307 .adjustReplication(addCloseOp.replication); 308 assert addCloseOp.blocks.length == 0; 309 310 // add to the file tree 311 inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion, 312 lastInodeId); 313 newFile = fsDir.unprotectedAddFile(inodeId, 314 addCloseOp.path, addCloseOp.permissions, replication, 315 addCloseOp.mtime, addCloseOp.atime, addCloseOp.blockSize, true, 316 addCloseOp.clientName, addCloseOp.clientMachine); 317 fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path); 318 319 // add the op into retry cache if necessary 320 if (toAddRetryCache) { 321 HdfsFileStatus stat = fsNamesys.dir.createFileStatus( 322 HdfsFileStatus.EMPTY_NAME, newFile, null); 323 fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId, 324 addCloseOp.rpcCallId, stat); 325 } 326 } else { // This is OP_ADD on an existing file 327 if (!oldFile.isUnderConstruction()) { 328 // This is case 3: a call to append() on an already-closed file. 329 if (FSNamesystem.LOG.isDebugEnabled()) { 330 FSNamesystem.LOG.debug("Reopening an already-closed file " + 331 "for append"); 332 } 333 LocatedBlock lb = fsNamesys.prepareFileForWrite(addCloseOp.path, 334 oldFile, addCloseOp.clientName, addCloseOp.clientMachine, null, 335 false, iip.getLatestSnapshot(), false); 336 newFile = INodeFile.valueOf(fsDir.getINode(addCloseOp.path), 337 addCloseOp.path, true); 338 339 // add the op into retry cache is necessary 340 if (toAddRetryCache) { 341 fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId, 342 addCloseOp.rpcCallId, lb); 343 } 344 } 345 } 346 // Fall-through for case 2. 347 // Regardless of whether it's a new file or an updated file, 348 // update the block list. 349 350 // Update the salient file attributes. 351 newFile.setAccessTime(addCloseOp.atime, null, fsDir.getINodeMap()); 352 newFile.setModificationTime(addCloseOp.mtime, null, fsDir.getINodeMap()); 353 updateBlocks(fsDir, addCloseOp, newFile); 354 break; 355 } 356 case OP_CLOSE: { 357 AddCloseOp addCloseOp = (AddCloseOp)op; 358 359 if (FSNamesystem.LOG.isDebugEnabled()) { 360 FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path + 361 " numblocks : " + addCloseOp.blocks.length + 362 " clientHolder " + addCloseOp.clientName + 363 " clientMachine " + addCloseOp.clientMachine); 364 } 365 366 final INodesInPath iip = fsDir.getLastINodeInPath(addCloseOp.path); 367 final INodeFile oldFile = INodeFile.valueOf(iip.getINode(0), addCloseOp.path); 368 369 // Update the salient file attributes. 370 oldFile.setAccessTime(addCloseOp.atime, null, fsDir.getINodeMap()); 371 oldFile.setModificationTime(addCloseOp.mtime, null, fsDir.getINodeMap()); 372 updateBlocks(fsDir, addCloseOp, oldFile); 373 374 // Now close the file 375 if (!oldFile.isUnderConstruction() && 376 logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) { 377 // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE 378 // could show up twice in a row. But after that version, this 379 // should be fixed, so we should treat it as an error. 380 throw new IOException( 381 "File is not under construction: " + addCloseOp.path); 382 } 383 // One might expect that you could use removeLease(holder, path) here, 384 // but OP_CLOSE doesn't serialize the holder. So, remove by path. 385 if (oldFile.isUnderConstruction()) { 386 INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile; 387 fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path); 388 INodeFile newFile = ucFile.toINodeFile(ucFile.getModificationTime()); 389 fsDir.unprotectedReplaceINodeFile(addCloseOp.path, ucFile, newFile); 390 } 391 break; 392 } 393 case OP_UPDATE_BLOCKS: { 394 UpdateBlocksOp updateOp = (UpdateBlocksOp)op; 395 if (FSNamesystem.LOG.isDebugEnabled()) { 396 FSNamesystem.LOG.debug(op.opCode + ": " + updateOp.path + 397 " numblocks : " + updateOp.blocks.length); 398 } 399 INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(updateOp.path), 400 updateOp.path); 401 // Update in-memory data structures 402 updateBlocks(fsDir, updateOp, oldFile); 403 404 if (toAddRetryCache) { 405 fsNamesys.addCacheEntry(updateOp.rpcClientId, updateOp.rpcCallId); 406 } 407 break; 408 } 409 410 case OP_SET_REPLICATION: { 411 SetReplicationOp setReplicationOp = (SetReplicationOp)op; 412 short replication = fsNamesys.getBlockManager().adjustReplication( 413 setReplicationOp.replication); 414 fsDir.unprotectedSetReplication(setReplicationOp.path, 415 replication, null); 416 break; 417 } 418 case OP_CONCAT_DELETE: { 419 ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op; 420 fsDir.unprotectedConcat(concatDeleteOp.trg, concatDeleteOp.srcs, 421 concatDeleteOp.timestamp); 422 423 if (toAddRetryCache) { 424 fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId, 425 concatDeleteOp.rpcCallId); 426 } 427 break; 428 } 429 case OP_RENAME_OLD: { 430 RenameOldOp renameOp = (RenameOldOp)op; 431 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, 432 renameOp.timestamp); 433 434 if (toAddRetryCache) { 435 fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId); 436 } 437 break; 438 } 439 case OP_DELETE: { 440 DeleteOp deleteOp = (DeleteOp)op; 441 fsDir.unprotectedDelete(deleteOp.path, deleteOp.timestamp); 442 443 if (toAddRetryCache) { 444 fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId); 445 } 446 break; 447 } 448 case OP_MKDIR: { 449 MkdirOp mkdirOp = (MkdirOp)op; 450 inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion, 451 lastInodeId); 452 fsDir.unprotectedMkdir(inodeId, mkdirOp.path, mkdirOp.permissions, 453 mkdirOp.timestamp); 454 break; 455 } 456 case OP_SET_GENSTAMP_V1: { 457 SetGenstampV1Op setGenstampV1Op = (SetGenstampV1Op)op; 458 fsNamesys.setGenerationStampV1(setGenstampV1Op.genStampV1); 459 break; 460 } 461 case OP_SET_PERMISSIONS: { 462 SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op; 463 fsDir.unprotectedSetPermission(setPermissionsOp.src, 464 setPermissionsOp.permissions); 465 break; 466 } 467 case OP_SET_OWNER: { 468 SetOwnerOp setOwnerOp = (SetOwnerOp)op; 469 fsDir.unprotectedSetOwner(setOwnerOp.src, setOwnerOp.username, 470 setOwnerOp.groupname); 471 break; 472 } 473 case OP_SET_NS_QUOTA: { 474 SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op; 475 fsDir.unprotectedSetQuota(setNSQuotaOp.src, 476 setNSQuotaOp.nsQuota, 477 HdfsConstants.QUOTA_DONT_SET); 478 break; 479 } 480 case OP_CLEAR_NS_QUOTA: { 481 ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op; 482 fsDir.unprotectedSetQuota(clearNSQuotaOp.src, 483 HdfsConstants.QUOTA_RESET, 484 HdfsConstants.QUOTA_DONT_SET); 485 break; 486 } 487 488 case OP_SET_QUOTA: 489 SetQuotaOp setQuotaOp = (SetQuotaOp)op; 490 fsDir.unprotectedSetQuota(setQuotaOp.src, 491 setQuotaOp.nsQuota, 492 setQuotaOp.dsQuota); 493 break; 494 495 case OP_TIMES: { 496 TimesOp timesOp = (TimesOp)op; 497 498 fsDir.unprotectedSetTimes(timesOp.path, 499 timesOp.mtime, 500 timesOp.atime, true); 501 break; 502 } 503 case OP_SYMLINK: { 504 SymlinkOp symlinkOp = (SymlinkOp)op; 505 inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion, 506 lastInodeId); 507 fsDir.unprotectedAddSymlink(inodeId, symlinkOp.path, 508 symlinkOp.value, symlinkOp.mtime, 509 symlinkOp.atime, symlinkOp.permissionStatus); 510 511 if (toAddRetryCache) { 512 fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId); 513 } 514 break; 515 } 516 case OP_RENAME: { 517 RenameOp renameOp = (RenameOp)op; 518 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, 519 renameOp.timestamp, renameOp.options); 520 521 if (toAddRetryCache) { 522 fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId); 523 } 524 break; 525 } 526 case OP_GET_DELEGATION_TOKEN: { 527 GetDelegationTokenOp getDelegationTokenOp 528 = (GetDelegationTokenOp)op; 529 530 fsNamesys.getDelegationTokenSecretManager() 531 .addPersistedDelegationToken(getDelegationTokenOp.token, 532 getDelegationTokenOp.expiryTime); 533 break; 534 } 535 case OP_RENEW_DELEGATION_TOKEN: { 536 RenewDelegationTokenOp renewDelegationTokenOp 537 = (RenewDelegationTokenOp)op; 538 fsNamesys.getDelegationTokenSecretManager() 539 .updatePersistedTokenRenewal(renewDelegationTokenOp.token, 540 renewDelegationTokenOp.expiryTime); 541 break; 542 } 543 case OP_CANCEL_DELEGATION_TOKEN: { 544 CancelDelegationTokenOp cancelDelegationTokenOp 545 = (CancelDelegationTokenOp)op; 546 fsNamesys.getDelegationTokenSecretManager() 547 .updatePersistedTokenCancellation( 548 cancelDelegationTokenOp.token); 549 break; 550 } 551 case OP_UPDATE_MASTER_KEY: { 552 UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op; 553 fsNamesys.getDelegationTokenSecretManager() 554 .updatePersistedMasterKey(updateMasterKeyOp.key); 555 break; 556 } 557 case OP_REASSIGN_LEASE: { 558 ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op; 559 560 Lease lease = fsNamesys.leaseManager.getLease( 561 reassignLeaseOp.leaseHolder); 562 INodeFileUnderConstruction pendingFile = 563 INodeFileUnderConstruction.valueOf( 564 fsDir.getINode(reassignLeaseOp.path), reassignLeaseOp.path); 565 fsNamesys.reassignLeaseInternal(lease, 566 reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile); 567 break; 568 } 569 case OP_START_LOG_SEGMENT: 570 case OP_END_LOG_SEGMENT: { 571 // no data in here currently. 572 break; 573 } 574 case OP_CREATE_SNAPSHOT: { 575 CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op; 576 String path = fsNamesys.getSnapshotManager().createSnapshot( 577 createSnapshotOp.snapshotRoot, createSnapshotOp.snapshotName); 578 if (toAddRetryCache) { 579 fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId, 580 createSnapshotOp.rpcCallId, path); 581 } 582 break; 583 } 584 case OP_DELETE_SNAPSHOT: { 585 DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op; 586 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); 587 List<INode> removedINodes = new ArrayList<INode>(); 588 fsNamesys.getSnapshotManager().deleteSnapshot( 589 deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName, 590 collectedBlocks, removedINodes); 591 fsNamesys.removeBlocks(collectedBlocks); 592 collectedBlocks.clear(); 593 fsNamesys.dir.removeFromInodeMap(removedINodes); 594 removedINodes.clear(); 595 596 if (toAddRetryCache) { 597 fsNamesys.addCacheEntry(deleteSnapshotOp.rpcClientId, 598 deleteSnapshotOp.rpcCallId); 599 } 600 break; 601 } 602 case OP_RENAME_SNAPSHOT: { 603 RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op; 604 fsNamesys.getSnapshotManager().renameSnapshot( 605 renameSnapshotOp.snapshotRoot, renameSnapshotOp.snapshotOldName, 606 renameSnapshotOp.snapshotNewName); 607 608 if (toAddRetryCache) { 609 fsNamesys.addCacheEntry(renameSnapshotOp.rpcClientId, 610 renameSnapshotOp.rpcCallId); 611 } 612 break; 613 } 614 case OP_ALLOW_SNAPSHOT: { 615 AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op; 616 fsNamesys.getSnapshotManager().setSnapshottable( 617 allowSnapshotOp.snapshotRoot, false); 618 break; 619 } 620 case OP_DISALLOW_SNAPSHOT: { 621 DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op; 622 fsNamesys.getSnapshotManager().resetSnapshottable( 623 disallowSnapshotOp.snapshotRoot); 624 break; 625 } 626 case OP_SET_GENSTAMP_V2: { 627 SetGenstampV2Op setGenstampV2Op = (SetGenstampV2Op) op; 628 fsNamesys.setGenerationStampV2(setGenstampV2Op.genStampV2); 629 break; 630 } 631 case OP_ALLOCATE_BLOCK_ID: { 632 AllocateBlockIdOp allocateBlockIdOp = (AllocateBlockIdOp) op; 633 fsNamesys.setLastAllocatedBlockId(allocateBlockIdOp.blockId); 634 break; 635 } 636 default: 637 throw new IOException("Invalid operation read " + op.opCode); 638 } 639 return inodeId; 640 } 641 642 private static String formatEditLogReplayError(EditLogInputStream in, 643 long recentOpcodeOffsets[], long txid) { 644 StringBuilder sb = new StringBuilder(); 645 sb.append("Error replaying edit log at offset " + in.getPosition()); 646 sb.append(". Expected transaction ID was ").append(txid); 647 if (recentOpcodeOffsets[0] != -1) { 648 Arrays.sort(recentOpcodeOffsets); 649 sb.append("\nRecent opcode offsets:"); 650 for (long offset : recentOpcodeOffsets) { 651 if (offset != -1) { 652 sb.append(' ').append(offset); 653 } 654 } 655 } 656 return sb.toString(); 657 } 658 659 /** 660 * Update in-memory data structures with new block information. 661 * @throws IOException 662 */ 663 private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op, 664 INodeFile file) throws IOException { 665 // Update its block list 666 BlockInfo[] oldBlocks = file.getBlocks(); 667 Block[] newBlocks = op.getBlocks(); 668 String path = op.getPath(); 669 670 // Are we only updating the last block's gen stamp. 671 boolean isGenStampUpdate = oldBlocks.length == newBlocks.length; 672 673 // First, update blocks in common 674 for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) { 675 BlockInfo oldBlock = oldBlocks[i]; 676 Block newBlock = newBlocks[i]; 677 678 boolean isLastBlock = i == newBlocks.length - 1; 679 if (oldBlock.getBlockId() != newBlock.getBlockId() || 680 (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 681 !(isGenStampUpdate && isLastBlock))) { 682 throw new IOException("Mismatched block IDs or generation stamps, " + 683 "attempting to replace block " + oldBlock + " with " + newBlock + 684 " as block # " + i + "/" + newBlocks.length + " of " + 685 path); 686 } 687 688 oldBlock.setNumBytes(newBlock.getNumBytes()); 689 boolean changeMade = 690 oldBlock.getGenerationStamp() != newBlock.getGenerationStamp(); 691 oldBlock.setGenerationStamp(newBlock.getGenerationStamp()); 692 693 if (oldBlock instanceof BlockInfoUnderConstruction && 694 (!isLastBlock || op.shouldCompleteLastBlock())) { 695 changeMade = true; 696 fsNamesys.getBlockManager().forceCompleteBlock( 697 (INodeFileUnderConstruction)file, 698 (BlockInfoUnderConstruction)oldBlock); 699 } 700 if (changeMade) { 701 // The state or gen-stamp of the block has changed. So, we may be 702 // able to process some messages from datanodes that we previously 703 // were unable to process. 704 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 705 } 706 } 707 708 if (newBlocks.length < oldBlocks.length) { 709 // We're removing a block from the file, e.g. abandonBlock(...) 710 if (!file.isUnderConstruction()) { 711 throw new IOException("Trying to remove a block from file " + 712 path + " which is not under construction."); 713 } 714 if (newBlocks.length != oldBlocks.length - 1) { 715 throw new IOException("Trying to remove more than one block from file " 716 + path); 717 } 718 Block oldBlock = oldBlocks[oldBlocks.length - 1]; 719 boolean removed = fsDir.unprotectedRemoveBlock(path, 720 (INodeFileUnderConstruction) file, oldBlock); 721 if (!removed && !(op instanceof UpdateBlocksOp)) { 722 throw new IOException("Trying to delete non-existant block " + oldBlock); 723 } 724 } else if (newBlocks.length > oldBlocks.length) { 725 // We're adding blocks 726 for (int i = oldBlocks.length; i < newBlocks.length; i++) { 727 Block newBlock = newBlocks[i]; 728 BlockInfo newBI; 729 if (!op.shouldCompleteLastBlock()) { 730 // TODO: shouldn't this only be true for the last block? 731 // what about an old-version fsync() where fsync isn't called 732 // until several blocks in? 733 newBI = new BlockInfoUnderConstruction( 734 newBlock, file.getBlockReplication()); 735 } else { 736 // OP_CLOSE should add finalized blocks. This code path 737 // is only executed when loading edits written by prior 738 // versions of Hadoop. Current versions always log 739 // OP_ADD operations as each block is allocated. 740 newBI = new BlockInfo(newBlock, file.getBlockReplication()); 741 } 742 fsNamesys.getBlockManager().addBlockCollection(newBI, file); 743 file.addBlock(newBI); 744 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 745 } 746 } 747 } 748 749 private static void dumpOpCounts( 750 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) { 751 StringBuilder sb = new StringBuilder(); 752 sb.append("Summary of operations loaded from edit log:\n "); 753 Joiner.on("\n ").withKeyValueSeparator("=").appendTo(sb, opCounts); 754 FSImage.LOG.debug(sb.toString()); 755 } 756 757 private void incrOpCount(FSEditLogOpCodes opCode, 758 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts, Step step, 759 Counter counter) { 760 Holder<Integer> holder = opCounts.get(opCode); 761 if (holder == null) { 762 holder = new Holder<Integer>(1); 763 opCounts.put(opCode, holder); 764 } else { 765 holder.held++; 766 } 767 counter.increment(); 768 } 769 770 /** 771 * Throw appropriate exception during upgrade from 203, when editlog loading 772 * could fail due to opcode conflicts. 773 */ 774 private void check203UpgradeFailure(int logVersion, Throwable e) 775 throws IOException { 776 // 0.20.203 version version has conflicting opcodes with the later releases. 777 // The editlog must be emptied by restarting the namenode, before proceeding 778 // with the upgrade. 779 if (Storage.is203LayoutVersion(logVersion) 780 && logVersion != HdfsConstants.LAYOUT_VERSION) { 781 String msg = "During upgrade failed to load the editlog version " 782 + logVersion + " from release 0.20.203. Please go back to the old " 783 + " release and restart the namenode. This empties the editlog " 784 + " and saves the namespace. Resume the upgrade after this step."; 785 throw new IOException(msg, e); 786 } 787 } 788 789 /** 790 * Find the last valid transaction ID in the stream. 791 * If there are invalid or corrupt transactions in the middle of the stream, 792 * validateEditLog will skip over them. 793 * This reads through the stream but does not close it. 794 * 795 * @throws IOException if the stream cannot be read due to an IO error (eg 796 * if the log does not exist) 797 */ 798 static EditLogValidation validateEditLog(EditLogInputStream in) { 799 long lastPos = 0; 800 long lastTxId = HdfsConstants.INVALID_TXID; 801 long numValid = 0; 802 FSEditLogOp op = null; 803 while (true) { 804 lastPos = in.getPosition(); 805 try { 806 if ((op = in.readOp()) == null) { 807 break; 808 } 809 } catch (Throwable t) { 810 FSImage.LOG.warn("Caught exception after reading " + numValid + 811 " ops from " + in + " while determining its valid length." + 812 "Position was " + lastPos, t); 813 in.resync(); 814 FSImage.LOG.warn("After resync, position is " + in.getPosition()); 815 continue; 816 } 817 if (lastTxId == HdfsConstants.INVALID_TXID 818 || op.getTransactionId() > lastTxId) { 819 lastTxId = op.getTransactionId(); 820 } 821 numValid++; 822 } 823 return new EditLogValidation(lastPos, lastTxId, false); 824 } 825 826 static class EditLogValidation { 827 private final long validLength; 828 private final long endTxId; 829 private final boolean hasCorruptHeader; 830 831 EditLogValidation(long validLength, long endTxId, 832 boolean hasCorruptHeader) { 833 this.validLength = validLength; 834 this.endTxId = endTxId; 835 this.hasCorruptHeader = hasCorruptHeader; 836 } 837 838 long getValidLength() { return validLength; } 839 840 long getEndTxId() { return endTxId; } 841 842 boolean hasCorruptHeader() { return hasCorruptHeader; } 843 } 844 845 /** 846 * Stream wrapper that keeps track of the current stream position. 847 * 848 * This stream also allows us to set a limit on how many bytes we can read 849 * without getting an exception. 850 */ 851 public static class PositionTrackingInputStream extends FilterInputStream 852 implements StreamLimiter { 853 private long curPos = 0; 854 private long markPos = -1; 855 private long limitPos = Long.MAX_VALUE; 856 857 public PositionTrackingInputStream(InputStream is) { 858 super(is); 859 } 860 861 private void checkLimit(long amt) throws IOException { 862 long extra = (curPos + amt) - limitPos; 863 if (extra > 0) { 864 throw new IOException("Tried to read " + amt + " byte(s) past " + 865 "the limit at offset " + limitPos); 866 } 867 } 868 869 @Override 870 public int read() throws IOException { 871 checkLimit(1); 872 int ret = super.read(); 873 if (ret != -1) curPos++; 874 return ret; 875 } 876 877 @Override 878 public int read(byte[] data) throws IOException { 879 checkLimit(data.length); 880 int ret = super.read(data); 881 if (ret > 0) curPos += ret; 882 return ret; 883 } 884 885 @Override 886 public int read(byte[] data, int offset, int length) throws IOException { 887 checkLimit(length); 888 int ret = super.read(data, offset, length); 889 if (ret > 0) curPos += ret; 890 return ret; 891 } 892 893 @Override 894 public void setLimit(long limit) { 895 limitPos = curPos + limit; 896 } 897 898 @Override 899 public void clearLimit() { 900 limitPos = Long.MAX_VALUE; 901 } 902 903 @Override 904 public void mark(int limit) { 905 super.mark(limit); 906 markPos = curPos; 907 } 908 909 @Override 910 public void reset() throws IOException { 911 if (markPos == -1) { 912 throw new IOException("Not marked!"); 913 } 914 super.reset(); 915 curPos = markPos; 916 markPos = -1; 917 } 918 919 public long getPos() { 920 return curPos; 921 } 922 923 @Override 924 public long skip(long amt) throws IOException { 925 long extra = (curPos + amt) - limitPos; 926 if (extra > 0) { 927 throw new IOException("Tried to skip " + extra + " bytes past " + 928 "the limit at offset " + limitPos); 929 } 930 long ret = super.skip(amt); 931 curPos += ret; 932 return ret; 933 } 934 } 935 936 public long getLastAppliedTxId() { 937 return lastAppliedTxId; 938 } 939 940 /** 941 * Creates a Step used for updating startup progress, populated with 942 * information from the given edits. The step always includes the log's name. 943 * If the log has a known length, then the length is included in the step too. 944 * 945 * @param edits EditLogInputStream to use for populating step 946 * @return Step populated with information from edits 947 * @throws IOException thrown if there is an I/O error 948 */ 949 private static Step createStartupProgressStep(EditLogInputStream edits) 950 throws IOException { 951 long length = edits.length(); 952 String name = edits.getCurrentStreamName(); 953 return length != -1 ? new Step(name, length) : new Step(name); 954 } 955 }