001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import static org.apache.hadoop.hdfs.server.namenode.FSImageFormat.renameReservedPathsOnUpgrade; 021import static org.apache.hadoop.util.Time.now; 022 023import java.io.FilterInputStream; 024import java.io.IOException; 025import java.io.InputStream; 026import java.util.Arrays; 027import java.util.EnumMap; 028import java.util.EnumSet; 029import java.util.List; 030 031import org.apache.commons.logging.Log; 032import org.apache.commons.logging.LogFactory; 033import org.apache.hadoop.classification.InterfaceAudience; 034import org.apache.hadoop.classification.InterfaceStability; 035import org.apache.hadoop.fs.FileSystem; 036import org.apache.hadoop.fs.XAttrSetFlag; 037import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; 038import org.apache.hadoop.hdfs.protocol.Block; 039import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; 040import org.apache.hadoop.hdfs.protocol.HdfsConstants; 041import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 042import org.apache.hadoop.hdfs.protocol.LayoutVersion; 043import org.apache.hadoop.hdfs.protocol.LocatedBlock; 044import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; 045import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; 046import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 047import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 048import org.apache.hadoop.hdfs.server.common.Storage; 049import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp; 050import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp; 051import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCachePoolOp; 052import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp; 053import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp; 054import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp; 055import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp; 056import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp; 057import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp; 058import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp; 059import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CreateSnapshotOp; 060import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp; 061import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteSnapshotOp; 062import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp; 063import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp; 064import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp; 065import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCacheDirectiveInfoOp; 066import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCachePoolOp; 067import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp; 068import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCacheDirectiveInfoOp; 069import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCachePoolOp; 070import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp; 071import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp; 072import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp; 073import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp; 074import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetAclOp; 075import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp; 076import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op; 077import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op; 078import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp; 079import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp; 080import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp; 081import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp; 082import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp; 083import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetStoragePolicyOp; 084import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetXAttrOp; 085import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveXAttrOp; 086import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp; 087import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp; 088import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp; 089import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp; 090import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; 091import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 092import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; 093import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; 094import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 095import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 096import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 097import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 098import org.apache.hadoop.hdfs.util.ChunkedArrayList; 099import org.apache.hadoop.hdfs.util.Holder; 100 101import com.google.common.base.Joiner; 102import com.google.common.base.Preconditions; 103 104@InterfaceAudience.Private 105@InterfaceStability.Evolving 106public class FSEditLogLoader { 107 static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName()); 108 static final long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec 109 110 private final FSNamesystem fsNamesys; 111 private long lastAppliedTxId; 112 /** Total number of end transactions loaded. */ 113 private int totalEdits = 0; 114 115 public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) { 116 this.fsNamesys = fsNamesys; 117 this.lastAppliedTxId = lastAppliedTxId; 118 } 119 120 long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId) 121 throws IOException { 122 return loadFSEdits(edits, expectedStartingTxId, null, null); 123 } 124 125 /** 126 * Load an edit log, and apply the changes to the in-memory structure 127 * This is where we apply edits that we've been writing to disk all 128 * along. 129 */ 130 long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId, 131 StartupOption startOpt, MetaRecoveryContext recovery) throws IOException { 132 StartupProgress prog = NameNode.getStartupProgress(); 133 Step step = createStartupProgressStep(edits); 134 prog.beginStep(Phase.LOADING_EDITS, step); 135 fsNamesys.writeLock(); 136 try { 137 long startTime = now(); 138 FSImage.LOG.info("Start loading edits file " + edits.getName()); 139 long numEdits = loadEditRecords(edits, false, expectedStartingTxId, 140 startOpt, recovery); 141 FSImage.LOG.info("Edits file " + edits.getName() 142 + " of size " + edits.length() + " edits # " + numEdits 143 + " loaded in " + (now()-startTime)/1000 + " seconds"); 144 return numEdits; 145 } finally { 146 edits.close(); 147 fsNamesys.writeUnlock(); 148 prog.endStep(Phase.LOADING_EDITS, step); 149 } 150 } 151 152 long loadEditRecords(EditLogInputStream in, boolean closeOnExit, 153 long expectedStartingTxId, StartupOption startOpt, 154 MetaRecoveryContext recovery) throws IOException { 155 FSDirectory fsDir = fsNamesys.dir; 156 157 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts = 158 new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class); 159 160 if (LOG.isTraceEnabled()) { 161 LOG.trace("Acquiring write lock to replay edit log"); 162 } 163 164 fsNamesys.writeLock(); 165 fsDir.writeLock(); 166 167 long recentOpcodeOffsets[] = new long[4]; 168 Arrays.fill(recentOpcodeOffsets, -1); 169 170 long expectedTxId = expectedStartingTxId; 171 long numEdits = 0; 172 long lastTxId = in.getLastTxId(); 173 long numTxns = (lastTxId - expectedStartingTxId) + 1; 174 StartupProgress prog = NameNode.getStartupProgress(); 175 Step step = createStartupProgressStep(in); 176 prog.setTotal(Phase.LOADING_EDITS, step, numTxns); 177 Counter counter = prog.getCounter(Phase.LOADING_EDITS, step); 178 long lastLogTime = now(); 179 long lastInodeId = fsNamesys.getLastInodeId(); 180 181 try { 182 while (true) { 183 try { 184 FSEditLogOp op; 185 try { 186 op = in.readOp(); 187 if (op == null) { 188 break; 189 } 190 } catch (Throwable e) { 191 // Handle a problem with our input 192 check203UpgradeFailure(in.getVersion(true), e); 193 String errorMessage = 194 formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId); 195 FSImage.LOG.error(errorMessage, e); 196 if (recovery == null) { 197 // We will only try to skip over problematic opcodes when in 198 // recovery mode. 199 throw new EditLogInputException(errorMessage, e, numEdits); 200 } 201 MetaRecoveryContext.editLogLoaderPrompt( 202 "We failed to read txId " + expectedTxId, 203 recovery, "skipping the bad section in the log"); 204 in.resync(); 205 continue; 206 } 207 recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] = 208 in.getPosition(); 209 if (op.hasTransactionId()) { 210 if (op.getTransactionId() > expectedTxId) { 211 MetaRecoveryContext.editLogLoaderPrompt("There appears " + 212 "to be a gap in the edit log. We expected txid " + 213 expectedTxId + ", but got txid " + 214 op.getTransactionId() + ".", recovery, "ignoring missing " + 215 " transaction IDs"); 216 } else if (op.getTransactionId() < expectedTxId) { 217 MetaRecoveryContext.editLogLoaderPrompt("There appears " + 218 "to be an out-of-order edit in the edit log. We " + 219 "expected txid " + expectedTxId + ", but got txid " + 220 op.getTransactionId() + ".", recovery, 221 "skipping the out-of-order edit"); 222 continue; 223 } 224 } 225 try { 226 if (LOG.isTraceEnabled()) { 227 LOG.trace("op=" + op + ", startOpt=" + startOpt 228 + ", numEdits=" + numEdits + ", totalEdits=" + totalEdits); 229 } 230 long inodeId = applyEditLogOp(op, fsDir, startOpt, 231 in.getVersion(true), lastInodeId); 232 if (lastInodeId < inodeId) { 233 lastInodeId = inodeId; 234 } 235 } catch (RollingUpgradeOp.RollbackException e) { 236 throw e; 237 } catch (Throwable e) { 238 LOG.error("Encountered exception on operation " + op, e); 239 if (recovery == null) { 240 throw e instanceof IOException? (IOException)e: new IOException(e); 241 } 242 243 MetaRecoveryContext.editLogLoaderPrompt("Failed to " + 244 "apply edit log operation " + op + ": error " + 245 e.getMessage(), recovery, "applying edits"); 246 } 247 // Now that the operation has been successfully decoded and 248 // applied, update our bookkeeping. 249 incrOpCount(op.opCode, opCounts, step, counter); 250 if (op.hasTransactionId()) { 251 lastAppliedTxId = op.getTransactionId(); 252 expectedTxId = lastAppliedTxId + 1; 253 } else { 254 expectedTxId = lastAppliedTxId = expectedStartingTxId; 255 } 256 // log progress 257 if (op.hasTransactionId()) { 258 long now = now(); 259 if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) { 260 long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1; 261 int percent = Math.round((float) deltaTxId / numTxns * 100); 262 LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns 263 + " transactions completed. (" + percent + "%)"); 264 lastLogTime = now; 265 } 266 } 267 numEdits++; 268 totalEdits++; 269 } catch (RollingUpgradeOp.RollbackException e) { 270 LOG.info("Stopped at OP_START_ROLLING_UPGRADE for rollback."); 271 break; 272 } catch (MetaRecoveryContext.RequestStopException e) { 273 MetaRecoveryContext.LOG.warn("Stopped reading edit log at " + 274 in.getPosition() + "/" + in.length()); 275 break; 276 } 277 } 278 } finally { 279 fsNamesys.resetLastInodeId(lastInodeId); 280 if(closeOnExit) { 281 in.close(); 282 } 283 fsDir.writeUnlock(); 284 fsNamesys.writeUnlock(); 285 286 if (LOG.isTraceEnabled()) { 287 LOG.trace("replaying edit log finished"); 288 } 289 290 if (FSImage.LOG.isDebugEnabled()) { 291 dumpOpCounts(opCounts); 292 } 293 } 294 return numEdits; 295 } 296 297 // allocate and update last allocated inode id 298 private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion, 299 long lastInodeId) throws IOException { 300 long inodeId = inodeIdFromOp; 301 302 if (inodeId == INodeId.GRANDFATHER_INODE_ID) { 303 if (NameNodeLayoutVersion.supports( 304 LayoutVersion.Feature.ADD_INODE_ID, logVersion)) { 305 throw new IOException("The layout version " + logVersion 306 + " supports inodeId but gave bogus inodeId"); 307 } 308 inodeId = fsNamesys.allocateNewInodeId(); 309 } else { 310 // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from 311 // fsimage but editlog captures more recent inodeId allocations 312 if (inodeId > lastInodeId) { 313 fsNamesys.resetLastInodeId(inodeId); 314 } 315 } 316 return inodeId; 317 } 318 319 @SuppressWarnings("deprecation") 320 private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, 321 StartupOption startOpt, int logVersion, long lastInodeId) throws IOException { 322 long inodeId = INodeId.GRANDFATHER_INODE_ID; 323 if (LOG.isTraceEnabled()) { 324 LOG.trace("replaying edit log: " + op); 325 } 326 final boolean toAddRetryCache = fsNamesys.hasRetryCache() && op.hasRpcIds(); 327 328 switch (op.opCode) { 329 case OP_ADD: { 330 AddCloseOp addCloseOp = (AddCloseOp)op; 331 final String path = 332 renameReservedPathsOnUpgrade(addCloseOp.path, logVersion); 333 if (FSNamesystem.LOG.isDebugEnabled()) { 334 FSNamesystem.LOG.debug(op.opCode + ": " + path + 335 " numblocks : " + addCloseOp.blocks.length + 336 " clientHolder " + addCloseOp.clientName + 337 " clientMachine " + addCloseOp.clientMachine); 338 } 339 // There three cases here: 340 // 1. OP_ADD to create a new file 341 // 2. OP_ADD to update file blocks 342 // 3. OP_ADD to open file for append 343 344 // See if the file already exists (persistBlocks call) 345 final INodesInPath iip = fsDir.getINodesInPath(path, true); 346 final INode[] inodes = iip.getINodes(); 347 INodeFile oldFile = INodeFile.valueOf( 348 inodes[inodes.length - 1], path, true); 349 if (oldFile != null && addCloseOp.overwrite) { 350 // This is OP_ADD with overwrite 351 fsDir.unprotectedDelete(path, addCloseOp.mtime); 352 oldFile = null; 353 } 354 INodeFile newFile = oldFile; 355 if (oldFile == null) { // this is OP_ADD on a new file (case 1) 356 // versions > 0 support per file replication 357 // get name and replication 358 final short replication = fsNamesys.getBlockManager() 359 .adjustReplication(addCloseOp.replication); 360 assert addCloseOp.blocks.length == 0; 361 362 // add to the file tree 363 inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion, 364 lastInodeId); 365 newFile = fsDir.unprotectedAddFile(inodeId, 366 path, addCloseOp.permissions, addCloseOp.aclEntries, 367 addCloseOp.xAttrs, 368 replication, addCloseOp.mtime, addCloseOp.atime, 369 addCloseOp.blockSize, true, addCloseOp.clientName, 370 addCloseOp.clientMachine, addCloseOp.storagePolicyId); 371 fsNamesys.leaseManager.addLease(addCloseOp.clientName, path); 372 373 // add the op into retry cache if necessary 374 if (toAddRetryCache) { 375 HdfsFileStatus stat = fsNamesys.dir.createFileStatus( 376 HdfsFileStatus.EMPTY_NAME, newFile, 377 BlockStoragePolicySuite.ID_UNSPECIFIED, Snapshot.CURRENT_STATE_ID, 378 false, iip); 379 fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId, 380 addCloseOp.rpcCallId, stat); 381 } 382 } else { // This is OP_ADD on an existing file 383 if (!oldFile.isUnderConstruction()) { 384 // This is case 3: a call to append() on an already-closed file. 385 if (FSNamesystem.LOG.isDebugEnabled()) { 386 FSNamesystem.LOG.debug("Reopening an already-closed file " + 387 "for append"); 388 } 389 LocatedBlock lb = fsNamesys.prepareFileForWrite(path, 390 iip, addCloseOp.clientName, addCloseOp.clientMachine, false, false); 391 newFile = INodeFile.valueOf(fsDir.getINode(path), 392 path, true); 393 394 // add the op into retry cache is necessary 395 if (toAddRetryCache) { 396 fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId, 397 addCloseOp.rpcCallId, lb); 398 } 399 } 400 } 401 // Fall-through for case 2. 402 // Regardless of whether it's a new file or an updated file, 403 // update the block list. 404 405 // Update the salient file attributes. 406 newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID); 407 newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID); 408 updateBlocks(fsDir, addCloseOp, newFile); 409 break; 410 } 411 case OP_CLOSE: { 412 AddCloseOp addCloseOp = (AddCloseOp)op; 413 final String path = 414 renameReservedPathsOnUpgrade(addCloseOp.path, logVersion); 415 if (FSNamesystem.LOG.isDebugEnabled()) { 416 FSNamesystem.LOG.debug(op.opCode + ": " + path + 417 " numblocks : " + addCloseOp.blocks.length + 418 " clientHolder " + addCloseOp.clientName + 419 " clientMachine " + addCloseOp.clientMachine); 420 } 421 422 final INodesInPath iip = fsDir.getLastINodeInPath(path); 423 final INodeFile file = INodeFile.valueOf(iip.getINode(0), path); 424 425 // Update the salient file attributes. 426 file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID); 427 file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID); 428 updateBlocks(fsDir, addCloseOp, file); 429 430 // Now close the file 431 if (!file.isUnderConstruction() && 432 logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) { 433 // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE 434 // could show up twice in a row. But after that version, this 435 // should be fixed, so we should treat it as an error. 436 throw new IOException( 437 "File is not under construction: " + path); 438 } 439 // One might expect that you could use removeLease(holder, path) here, 440 // but OP_CLOSE doesn't serialize the holder. So, remove by path. 441 if (file.isUnderConstruction()) { 442 fsNamesys.leaseManager.removeLeaseWithPrefixPath(path); 443 file.toCompleteFile(file.getModificationTime()); 444 } 445 break; 446 } 447 case OP_UPDATE_BLOCKS: { 448 UpdateBlocksOp updateOp = (UpdateBlocksOp)op; 449 final String path = 450 renameReservedPathsOnUpgrade(updateOp.path, logVersion); 451 if (FSNamesystem.LOG.isDebugEnabled()) { 452 FSNamesystem.LOG.debug(op.opCode + ": " + path + 453 " numblocks : " + updateOp.blocks.length); 454 } 455 INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path), 456 path); 457 // Update in-memory data structures 458 updateBlocks(fsDir, updateOp, oldFile); 459 460 if (toAddRetryCache) { 461 fsNamesys.addCacheEntry(updateOp.rpcClientId, updateOp.rpcCallId); 462 } 463 break; 464 } 465 case OP_ADD_BLOCK: { 466 AddBlockOp addBlockOp = (AddBlockOp) op; 467 String path = renameReservedPathsOnUpgrade(addBlockOp.getPath(), logVersion); 468 if (FSNamesystem.LOG.isDebugEnabled()) { 469 FSNamesystem.LOG.debug(op.opCode + ": " + path + 470 " new block id : " + addBlockOp.getLastBlock().getBlockId()); 471 } 472 INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path), path); 473 // add the new block to the INodeFile 474 addNewBlock(fsDir, addBlockOp, oldFile); 475 break; 476 } 477 case OP_SET_REPLICATION: { 478 SetReplicationOp setReplicationOp = (SetReplicationOp)op; 479 short replication = fsNamesys.getBlockManager().adjustReplication( 480 setReplicationOp.replication); 481 fsDir.unprotectedSetReplication( 482 renameReservedPathsOnUpgrade(setReplicationOp.path, logVersion), 483 replication, null); 484 break; 485 } 486 case OP_CONCAT_DELETE: { 487 ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op; 488 String trg = renameReservedPathsOnUpgrade(concatDeleteOp.trg, logVersion); 489 String[] srcs = new String[concatDeleteOp.srcs.length]; 490 for (int i=0; i<srcs.length; i++) { 491 srcs[i] = 492 renameReservedPathsOnUpgrade(concatDeleteOp.srcs[i], logVersion); 493 } 494 fsDir.unprotectedConcat(trg, srcs, concatDeleteOp.timestamp); 495 496 if (toAddRetryCache) { 497 fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId, 498 concatDeleteOp.rpcCallId); 499 } 500 break; 501 } 502 case OP_RENAME_OLD: { 503 RenameOldOp renameOp = (RenameOldOp)op; 504 final String src = renameReservedPathsOnUpgrade(renameOp.src, logVersion); 505 final String dst = renameReservedPathsOnUpgrade(renameOp.dst, logVersion); 506 fsDir.unprotectedRenameTo(src, dst, 507 renameOp.timestamp); 508 509 if (toAddRetryCache) { 510 fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId); 511 } 512 break; 513 } 514 case OP_DELETE: { 515 DeleteOp deleteOp = (DeleteOp)op; 516 fsDir.unprotectedDelete( 517 renameReservedPathsOnUpgrade(deleteOp.path, logVersion), 518 deleteOp.timestamp); 519 520 if (toAddRetryCache) { 521 fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId); 522 } 523 break; 524 } 525 case OP_MKDIR: { 526 MkdirOp mkdirOp = (MkdirOp)op; 527 inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion, 528 lastInodeId); 529 fsDir.unprotectedMkdir(inodeId, 530 renameReservedPathsOnUpgrade(mkdirOp.path, logVersion), 531 mkdirOp.permissions, mkdirOp.aclEntries, mkdirOp.timestamp); 532 break; 533 } 534 case OP_SET_GENSTAMP_V1: { 535 SetGenstampV1Op setGenstampV1Op = (SetGenstampV1Op)op; 536 fsNamesys.setGenerationStampV1(setGenstampV1Op.genStampV1); 537 break; 538 } 539 case OP_SET_PERMISSIONS: { 540 SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op; 541 fsDir.unprotectedSetPermission( 542 renameReservedPathsOnUpgrade(setPermissionsOp.src, logVersion), 543 setPermissionsOp.permissions); 544 break; 545 } 546 case OP_SET_OWNER: { 547 SetOwnerOp setOwnerOp = (SetOwnerOp)op; 548 fsDir.unprotectedSetOwner( 549 renameReservedPathsOnUpgrade(setOwnerOp.src, logVersion), 550 setOwnerOp.username, setOwnerOp.groupname); 551 break; 552 } 553 case OP_SET_NS_QUOTA: { 554 SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op; 555 fsDir.unprotectedSetQuota( 556 renameReservedPathsOnUpgrade(setNSQuotaOp.src, logVersion), 557 setNSQuotaOp.nsQuota, HdfsConstants.QUOTA_DONT_SET); 558 break; 559 } 560 case OP_CLEAR_NS_QUOTA: { 561 ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op; 562 fsDir.unprotectedSetQuota( 563 renameReservedPathsOnUpgrade(clearNSQuotaOp.src, logVersion), 564 HdfsConstants.QUOTA_RESET, HdfsConstants.QUOTA_DONT_SET); 565 break; 566 } 567 568 case OP_SET_QUOTA: 569 SetQuotaOp setQuotaOp = (SetQuotaOp)op; 570 fsDir.unprotectedSetQuota( 571 renameReservedPathsOnUpgrade(setQuotaOp.src, logVersion), 572 setQuotaOp.nsQuota, setQuotaOp.dsQuota); 573 break; 574 575 case OP_TIMES: { 576 TimesOp timesOp = (TimesOp)op; 577 578 fsDir.unprotectedSetTimes( 579 renameReservedPathsOnUpgrade(timesOp.path, logVersion), 580 timesOp.mtime, timesOp.atime, true); 581 break; 582 } 583 case OP_SYMLINK: { 584 if (!FileSystem.areSymlinksEnabled()) { 585 throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS"); 586 } 587 SymlinkOp symlinkOp = (SymlinkOp)op; 588 inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion, 589 lastInodeId); 590 fsDir.unprotectedAddSymlink(inodeId, 591 renameReservedPathsOnUpgrade(symlinkOp.path, logVersion), 592 symlinkOp.value, symlinkOp.mtime, symlinkOp.atime, 593 symlinkOp.permissionStatus); 594 595 if (toAddRetryCache) { 596 fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId); 597 } 598 break; 599 } 600 case OP_RENAME: { 601 RenameOp renameOp = (RenameOp)op; 602 fsDir.unprotectedRenameTo( 603 renameReservedPathsOnUpgrade(renameOp.src, logVersion), 604 renameReservedPathsOnUpgrade(renameOp.dst, logVersion), 605 renameOp.timestamp, renameOp.options); 606 607 if (toAddRetryCache) { 608 fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId); 609 } 610 break; 611 } 612 case OP_GET_DELEGATION_TOKEN: { 613 GetDelegationTokenOp getDelegationTokenOp 614 = (GetDelegationTokenOp)op; 615 616 fsNamesys.getDelegationTokenSecretManager() 617 .addPersistedDelegationToken(getDelegationTokenOp.token, 618 getDelegationTokenOp.expiryTime); 619 break; 620 } 621 case OP_RENEW_DELEGATION_TOKEN: { 622 RenewDelegationTokenOp renewDelegationTokenOp 623 = (RenewDelegationTokenOp)op; 624 fsNamesys.getDelegationTokenSecretManager() 625 .updatePersistedTokenRenewal(renewDelegationTokenOp.token, 626 renewDelegationTokenOp.expiryTime); 627 break; 628 } 629 case OP_CANCEL_DELEGATION_TOKEN: { 630 CancelDelegationTokenOp cancelDelegationTokenOp 631 = (CancelDelegationTokenOp)op; 632 fsNamesys.getDelegationTokenSecretManager() 633 .updatePersistedTokenCancellation( 634 cancelDelegationTokenOp.token); 635 break; 636 } 637 case OP_UPDATE_MASTER_KEY: { 638 UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op; 639 fsNamesys.getDelegationTokenSecretManager() 640 .updatePersistedMasterKey(updateMasterKeyOp.key); 641 break; 642 } 643 case OP_REASSIGN_LEASE: { 644 ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op; 645 646 Lease lease = fsNamesys.leaseManager.getLease( 647 reassignLeaseOp.leaseHolder); 648 final String path = 649 renameReservedPathsOnUpgrade(reassignLeaseOp.path, logVersion); 650 INodeFile pendingFile = fsDir.getINode(path).asFile(); 651 Preconditions.checkState(pendingFile.isUnderConstruction()); 652 fsNamesys.reassignLeaseInternal(lease, 653 path, reassignLeaseOp.newHolder, pendingFile); 654 break; 655 } 656 case OP_START_LOG_SEGMENT: 657 case OP_END_LOG_SEGMENT: { 658 // no data in here currently. 659 break; 660 } 661 case OP_CREATE_SNAPSHOT: { 662 CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op; 663 final String snapshotRoot = 664 renameReservedPathsOnUpgrade(createSnapshotOp.snapshotRoot, 665 logVersion); 666 String path = fsNamesys.getSnapshotManager().createSnapshot( 667 snapshotRoot, createSnapshotOp.snapshotName); 668 if (toAddRetryCache) { 669 fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId, 670 createSnapshotOp.rpcCallId, path); 671 } 672 break; 673 } 674 case OP_DELETE_SNAPSHOT: { 675 DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op; 676 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); 677 List<INode> removedINodes = new ChunkedArrayList<INode>(); 678 final String snapshotRoot = 679 renameReservedPathsOnUpgrade(deleteSnapshotOp.snapshotRoot, 680 logVersion); 681 fsNamesys.getSnapshotManager().deleteSnapshot( 682 snapshotRoot, deleteSnapshotOp.snapshotName, 683 collectedBlocks, removedINodes); 684 fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks); 685 collectedBlocks.clear(); 686 fsNamesys.dir.removeFromInodeMap(removedINodes); 687 removedINodes.clear(); 688 689 if (toAddRetryCache) { 690 fsNamesys.addCacheEntry(deleteSnapshotOp.rpcClientId, 691 deleteSnapshotOp.rpcCallId); 692 } 693 break; 694 } 695 case OP_RENAME_SNAPSHOT: { 696 RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op; 697 final String snapshotRoot = 698 renameReservedPathsOnUpgrade(renameSnapshotOp.snapshotRoot, 699 logVersion); 700 fsNamesys.getSnapshotManager().renameSnapshot( 701 snapshotRoot, renameSnapshotOp.snapshotOldName, 702 renameSnapshotOp.snapshotNewName); 703 704 if (toAddRetryCache) { 705 fsNamesys.addCacheEntry(renameSnapshotOp.rpcClientId, 706 renameSnapshotOp.rpcCallId); 707 } 708 break; 709 } 710 case OP_ALLOW_SNAPSHOT: { 711 AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op; 712 final String snapshotRoot = 713 renameReservedPathsOnUpgrade(allowSnapshotOp.snapshotRoot, logVersion); 714 fsNamesys.getSnapshotManager().setSnapshottable( 715 snapshotRoot, false); 716 break; 717 } 718 case OP_DISALLOW_SNAPSHOT: { 719 DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op; 720 final String snapshotRoot = 721 renameReservedPathsOnUpgrade(disallowSnapshotOp.snapshotRoot, 722 logVersion); 723 fsNamesys.getSnapshotManager().resetSnapshottable( 724 snapshotRoot); 725 break; 726 } 727 case OP_SET_GENSTAMP_V2: { 728 SetGenstampV2Op setGenstampV2Op = (SetGenstampV2Op) op; 729 fsNamesys.setGenerationStampV2(setGenstampV2Op.genStampV2); 730 break; 731 } 732 case OP_ALLOCATE_BLOCK_ID: { 733 AllocateBlockIdOp allocateBlockIdOp = (AllocateBlockIdOp) op; 734 fsNamesys.setLastAllocatedBlockId(allocateBlockIdOp.blockId); 735 break; 736 } 737 case OP_ROLLING_UPGRADE_START: { 738 if (startOpt == StartupOption.ROLLINGUPGRADE) { 739 final RollingUpgradeStartupOption rollingUpgradeOpt 740 = startOpt.getRollingUpgradeStartupOption(); 741 if (rollingUpgradeOpt == RollingUpgradeStartupOption.ROLLBACK) { 742 throw new RollingUpgradeOp.RollbackException(); 743 } else if (rollingUpgradeOpt == RollingUpgradeStartupOption.DOWNGRADE) { 744 //ignore upgrade marker 745 break; 746 } 747 } 748 // start rolling upgrade 749 final long startTime = ((RollingUpgradeOp) op).getTime(); 750 fsNamesys.startRollingUpgradeInternal(startTime); 751 fsNamesys.triggerRollbackCheckpoint(); 752 break; 753 } 754 case OP_ROLLING_UPGRADE_FINALIZE: { 755 final long finalizeTime = ((RollingUpgradeOp) op).getTime(); 756 if (fsNamesys.isRollingUpgrade()) { 757 // Only do it when NN is actually doing rolling upgrade. 758 // We can get FINALIZE without corresponding START, if NN is restarted 759 // before this op is consumed and a new checkpoint is created. 760 fsNamesys.finalizeRollingUpgradeInternal(finalizeTime); 761 } 762 fsNamesys.getFSImage().updateStorageVersion(); 763 fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK, 764 NameNodeFile.IMAGE); 765 break; 766 } 767 case OP_ADD_CACHE_DIRECTIVE: { 768 AddCacheDirectiveInfoOp addOp = (AddCacheDirectiveInfoOp) op; 769 CacheDirectiveInfo result = fsNamesys. 770 getCacheManager().addDirectiveFromEditLog(addOp.directive); 771 if (toAddRetryCache) { 772 Long id = result.getId(); 773 fsNamesys.addCacheEntryWithPayload(op.rpcClientId, op.rpcCallId, id); 774 } 775 break; 776 } 777 case OP_MODIFY_CACHE_DIRECTIVE: { 778 ModifyCacheDirectiveInfoOp modifyOp = 779 (ModifyCacheDirectiveInfoOp) op; 780 fsNamesys.getCacheManager().modifyDirectiveFromEditLog( 781 modifyOp.directive); 782 if (toAddRetryCache) { 783 fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); 784 } 785 break; 786 } 787 case OP_REMOVE_CACHE_DIRECTIVE: { 788 RemoveCacheDirectiveInfoOp removeOp = 789 (RemoveCacheDirectiveInfoOp) op; 790 fsNamesys.getCacheManager().removeDirective(removeOp.id, null); 791 if (toAddRetryCache) { 792 fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); 793 } 794 break; 795 } 796 case OP_ADD_CACHE_POOL: { 797 AddCachePoolOp addOp = (AddCachePoolOp) op; 798 fsNamesys.getCacheManager().addCachePool(addOp.info); 799 if (toAddRetryCache) { 800 fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); 801 } 802 break; 803 } 804 case OP_MODIFY_CACHE_POOL: { 805 ModifyCachePoolOp modifyOp = (ModifyCachePoolOp) op; 806 fsNamesys.getCacheManager().modifyCachePool(modifyOp.info); 807 if (toAddRetryCache) { 808 fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); 809 } 810 break; 811 } 812 case OP_REMOVE_CACHE_POOL: { 813 RemoveCachePoolOp removeOp = (RemoveCachePoolOp) op; 814 fsNamesys.getCacheManager().removeCachePool(removeOp.poolName); 815 if (toAddRetryCache) { 816 fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); 817 } 818 break; 819 } 820 case OP_SET_ACL: { 821 SetAclOp setAclOp = (SetAclOp) op; 822 fsDir.unprotectedSetAcl(setAclOp.src, setAclOp.aclEntries); 823 break; 824 } 825 case OP_SET_XATTR: { 826 SetXAttrOp setXAttrOp = (SetXAttrOp) op; 827 fsDir.unprotectedSetXAttrs(setXAttrOp.src, setXAttrOp.xAttrs, 828 EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE)); 829 if (toAddRetryCache) { 830 fsNamesys.addCacheEntry(setXAttrOp.rpcClientId, setXAttrOp.rpcCallId); 831 } 832 break; 833 } 834 case OP_REMOVE_XATTR: { 835 RemoveXAttrOp removeXAttrOp = (RemoveXAttrOp) op; 836 fsDir.unprotectedRemoveXAttrs(removeXAttrOp.src, 837 removeXAttrOp.xAttrs); 838 if (toAddRetryCache) { 839 fsNamesys.addCacheEntry(removeXAttrOp.rpcClientId, 840 removeXAttrOp.rpcCallId); 841 } 842 break; 843 } 844 case OP_SET_STORAGE_POLICY: { 845 SetStoragePolicyOp setStoragePolicyOp = (SetStoragePolicyOp) op; 846 fsDir.unprotectedSetStoragePolicy( 847 renameReservedPathsOnUpgrade(setStoragePolicyOp.path, logVersion), 848 setStoragePolicyOp.policyId); 849 break; 850 } 851 default: 852 throw new IOException("Invalid operation read " + op.opCode); 853 } 854 return inodeId; 855 } 856 857 private static String formatEditLogReplayError(EditLogInputStream in, 858 long recentOpcodeOffsets[], long txid) { 859 StringBuilder sb = new StringBuilder(); 860 sb.append("Error replaying edit log at offset " + in.getPosition()); 861 sb.append(". Expected transaction ID was ").append(txid); 862 if (recentOpcodeOffsets[0] != -1) { 863 Arrays.sort(recentOpcodeOffsets); 864 sb.append("\nRecent opcode offsets:"); 865 for (long offset : recentOpcodeOffsets) { 866 if (offset != -1) { 867 sb.append(' ').append(offset); 868 } 869 } 870 } 871 return sb.toString(); 872 } 873 874 /** 875 * Add a new block into the given INodeFile 876 */ 877 private void addNewBlock(FSDirectory fsDir, AddBlockOp op, INodeFile file) 878 throws IOException { 879 BlockInfo[] oldBlocks = file.getBlocks(); 880 Block pBlock = op.getPenultimateBlock(); 881 Block newBlock= op.getLastBlock(); 882 883 if (pBlock != null) { // the penultimate block is not null 884 Preconditions.checkState(oldBlocks != null && oldBlocks.length > 0); 885 // compare pBlock with the last block of oldBlocks 886 Block oldLastBlock = oldBlocks[oldBlocks.length - 1]; 887 if (oldLastBlock.getBlockId() != pBlock.getBlockId() 888 || oldLastBlock.getGenerationStamp() != pBlock.getGenerationStamp()) { 889 throw new IOException( 890 "Mismatched block IDs or generation stamps for the old last block of file " 891 + op.getPath() + ", the old last block is " + oldLastBlock 892 + ", and the block read from editlog is " + pBlock); 893 } 894 895 oldLastBlock.setNumBytes(pBlock.getNumBytes()); 896 if (oldLastBlock instanceof BlockInfoUnderConstruction) { 897 fsNamesys.getBlockManager().forceCompleteBlock(file, 898 (BlockInfoUnderConstruction) oldLastBlock); 899 fsNamesys.getBlockManager().processQueuedMessagesForBlock(pBlock); 900 } 901 } else { // the penultimate block is null 902 Preconditions.checkState(oldBlocks == null || oldBlocks.length == 0); 903 } 904 // add the new block 905 BlockInfo newBI = new BlockInfoUnderConstruction( 906 newBlock, file.getBlockReplication()); 907 fsNamesys.getBlockManager().addBlockCollection(newBI, file); 908 file.addBlock(newBI); 909 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 910 } 911 912 /** 913 * Update in-memory data structures with new block information. 914 * @throws IOException 915 */ 916 private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op, 917 INodeFile file) throws IOException { 918 // Update its block list 919 BlockInfo[] oldBlocks = file.getBlocks(); 920 Block[] newBlocks = op.getBlocks(); 921 String path = op.getPath(); 922 923 // Are we only updating the last block's gen stamp. 924 boolean isGenStampUpdate = oldBlocks.length == newBlocks.length; 925 926 // First, update blocks in common 927 for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) { 928 BlockInfo oldBlock = oldBlocks[i]; 929 Block newBlock = newBlocks[i]; 930 931 boolean isLastBlock = i == newBlocks.length - 1; 932 if (oldBlock.getBlockId() != newBlock.getBlockId() || 933 (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 934 !(isGenStampUpdate && isLastBlock))) { 935 throw new IOException("Mismatched block IDs or generation stamps, " + 936 "attempting to replace block " + oldBlock + " with " + newBlock + 937 " as block # " + i + "/" + newBlocks.length + " of " + 938 path); 939 } 940 941 oldBlock.setNumBytes(newBlock.getNumBytes()); 942 boolean changeMade = 943 oldBlock.getGenerationStamp() != newBlock.getGenerationStamp(); 944 oldBlock.setGenerationStamp(newBlock.getGenerationStamp()); 945 946 if (oldBlock instanceof BlockInfoUnderConstruction && 947 (!isLastBlock || op.shouldCompleteLastBlock())) { 948 changeMade = true; 949 fsNamesys.getBlockManager().forceCompleteBlock(file, 950 (BlockInfoUnderConstruction) oldBlock); 951 } 952 if (changeMade) { 953 // The state or gen-stamp of the block has changed. So, we may be 954 // able to process some messages from datanodes that we previously 955 // were unable to process. 956 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 957 } 958 } 959 960 if (newBlocks.length < oldBlocks.length) { 961 // We're removing a block from the file, e.g. abandonBlock(...) 962 if (!file.isUnderConstruction()) { 963 throw new IOException("Trying to remove a block from file " + 964 path + " which is not under construction."); 965 } 966 if (newBlocks.length != oldBlocks.length - 1) { 967 throw new IOException("Trying to remove more than one block from file " 968 + path); 969 } 970 Block oldBlock = oldBlocks[oldBlocks.length - 1]; 971 boolean removed = fsDir.unprotectedRemoveBlock(path, file, oldBlock); 972 if (!removed && !(op instanceof UpdateBlocksOp)) { 973 throw new IOException("Trying to delete non-existant block " + oldBlock); 974 } 975 } else if (newBlocks.length > oldBlocks.length) { 976 // We're adding blocks 977 for (int i = oldBlocks.length; i < newBlocks.length; i++) { 978 Block newBlock = newBlocks[i]; 979 BlockInfo newBI; 980 if (!op.shouldCompleteLastBlock()) { 981 // TODO: shouldn't this only be true for the last block? 982 // what about an old-version fsync() where fsync isn't called 983 // until several blocks in? 984 newBI = new BlockInfoUnderConstruction( 985 newBlock, file.getBlockReplication()); 986 } else { 987 // OP_CLOSE should add finalized blocks. This code path 988 // is only executed when loading edits written by prior 989 // versions of Hadoop. Current versions always log 990 // OP_ADD operations as each block is allocated. 991 newBI = new BlockInfo(newBlock, file.getBlockReplication()); 992 } 993 fsNamesys.getBlockManager().addBlockCollection(newBI, file); 994 file.addBlock(newBI); 995 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 996 } 997 } 998 } 999 1000 private static void dumpOpCounts( 1001 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) { 1002 StringBuilder sb = new StringBuilder(); 1003 sb.append("Summary of operations loaded from edit log:\n "); 1004 Joiner.on("\n ").withKeyValueSeparator("=").appendTo(sb, opCounts); 1005 FSImage.LOG.debug(sb.toString()); 1006 } 1007 1008 private void incrOpCount(FSEditLogOpCodes opCode, 1009 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts, Step step, 1010 Counter counter) { 1011 Holder<Integer> holder = opCounts.get(opCode); 1012 if (holder == null) { 1013 holder = new Holder<Integer>(1); 1014 opCounts.put(opCode, holder); 1015 } else { 1016 holder.held++; 1017 } 1018 counter.increment(); 1019 } 1020 1021 /** 1022 * Throw appropriate exception during upgrade from 203, when editlog loading 1023 * could fail due to opcode conflicts. 1024 */ 1025 private void check203UpgradeFailure(int logVersion, Throwable e) 1026 throws IOException { 1027 // 0.20.203 version version has conflicting opcodes with the later releases. 1028 // The editlog must be emptied by restarting the namenode, before proceeding 1029 // with the upgrade. 1030 if (Storage.is203LayoutVersion(logVersion) 1031 && logVersion != HdfsConstants.NAMENODE_LAYOUT_VERSION) { 1032 String msg = "During upgrade failed to load the editlog version " 1033 + logVersion + " from release 0.20.203. Please go back to the old " 1034 + " release and restart the namenode. This empties the editlog " 1035 + " and saves the namespace. Resume the upgrade after this step."; 1036 throw new IOException(msg, e); 1037 } 1038 } 1039 1040 /** 1041 * Find the last valid transaction ID in the stream. 1042 * If there are invalid or corrupt transactions in the middle of the stream, 1043 * validateEditLog will skip over them. 1044 * This reads through the stream but does not close it. 1045 */ 1046 static EditLogValidation validateEditLog(EditLogInputStream in) { 1047 long lastPos = 0; 1048 long lastTxId = HdfsConstants.INVALID_TXID; 1049 long numValid = 0; 1050 FSEditLogOp op = null; 1051 while (true) { 1052 lastPos = in.getPosition(); 1053 try { 1054 if ((op = in.readOp()) == null) { 1055 break; 1056 } 1057 } catch (Throwable t) { 1058 FSImage.LOG.warn("Caught exception after reading " + numValid + 1059 " ops from " + in + " while determining its valid length." + 1060 "Position was " + lastPos, t); 1061 in.resync(); 1062 FSImage.LOG.warn("After resync, position is " + in.getPosition()); 1063 continue; 1064 } 1065 if (lastTxId == HdfsConstants.INVALID_TXID 1066 || op.getTransactionId() > lastTxId) { 1067 lastTxId = op.getTransactionId(); 1068 } 1069 numValid++; 1070 } 1071 return new EditLogValidation(lastPos, lastTxId, false); 1072 } 1073 1074 static EditLogValidation scanEditLog(EditLogInputStream in) { 1075 long lastPos = 0; 1076 long lastTxId = HdfsConstants.INVALID_TXID; 1077 long numValid = 0; 1078 FSEditLogOp op = null; 1079 while (true) { 1080 lastPos = in.getPosition(); 1081 try { 1082 if ((op = in.readOp()) == null) { // TODO 1083 break; 1084 } 1085 } catch (Throwable t) { 1086 FSImage.LOG.warn("Caught exception after reading " + numValid + 1087 " ops from " + in + " while determining its valid length." + 1088 "Position was " + lastPos, t); 1089 in.resync(); 1090 FSImage.LOG.warn("After resync, position is " + in.getPosition()); 1091 continue; 1092 } 1093 if (lastTxId == HdfsConstants.INVALID_TXID 1094 || op.getTransactionId() > lastTxId) { 1095 lastTxId = op.getTransactionId(); 1096 } 1097 numValid++; 1098 } 1099 return new EditLogValidation(lastPos, lastTxId, false); 1100 } 1101 1102 static class EditLogValidation { 1103 private final long validLength; 1104 private final long endTxId; 1105 private final boolean hasCorruptHeader; 1106 1107 EditLogValidation(long validLength, long endTxId, 1108 boolean hasCorruptHeader) { 1109 this.validLength = validLength; 1110 this.endTxId = endTxId; 1111 this.hasCorruptHeader = hasCorruptHeader; 1112 } 1113 1114 long getValidLength() { return validLength; } 1115 1116 long getEndTxId() { return endTxId; } 1117 1118 boolean hasCorruptHeader() { return hasCorruptHeader; } 1119 } 1120 1121 /** 1122 * Stream wrapper that keeps track of the current stream position. 1123 * 1124 * This stream also allows us to set a limit on how many bytes we can read 1125 * without getting an exception. 1126 */ 1127 public static class PositionTrackingInputStream extends FilterInputStream 1128 implements StreamLimiter { 1129 private long curPos = 0; 1130 private long markPos = -1; 1131 private long limitPos = Long.MAX_VALUE; 1132 1133 public PositionTrackingInputStream(InputStream is) { 1134 super(is); 1135 } 1136 1137 private void checkLimit(long amt) throws IOException { 1138 long extra = (curPos + amt) - limitPos; 1139 if (extra > 0) { 1140 throw new IOException("Tried to read " + amt + " byte(s) past " + 1141 "the limit at offset " + limitPos); 1142 } 1143 } 1144 1145 @Override 1146 public int read() throws IOException { 1147 checkLimit(1); 1148 int ret = super.read(); 1149 if (ret != -1) curPos++; 1150 return ret; 1151 } 1152 1153 @Override 1154 public int read(byte[] data) throws IOException { 1155 checkLimit(data.length); 1156 int ret = super.read(data); 1157 if (ret > 0) curPos += ret; 1158 return ret; 1159 } 1160 1161 @Override 1162 public int read(byte[] data, int offset, int length) throws IOException { 1163 checkLimit(length); 1164 int ret = super.read(data, offset, length); 1165 if (ret > 0) curPos += ret; 1166 return ret; 1167 } 1168 1169 @Override 1170 public void setLimit(long limit) { 1171 limitPos = curPos + limit; 1172 } 1173 1174 @Override 1175 public void clearLimit() { 1176 limitPos = Long.MAX_VALUE; 1177 } 1178 1179 @Override 1180 public void mark(int limit) { 1181 super.mark(limit); 1182 markPos = curPos; 1183 } 1184 1185 @Override 1186 public void reset() throws IOException { 1187 if (markPos == -1) { 1188 throw new IOException("Not marked!"); 1189 } 1190 super.reset(); 1191 curPos = markPos; 1192 markPos = -1; 1193 } 1194 1195 public long getPos() { 1196 return curPos; 1197 } 1198 1199 @Override 1200 public long skip(long amt) throws IOException { 1201 long extra = (curPos + amt) - limitPos; 1202 if (extra > 0) { 1203 throw new IOException("Tried to skip " + extra + " bytes past " + 1204 "the limit at offset " + limitPos); 1205 } 1206 long ret = super.skip(amt); 1207 curPos += ret; 1208 return ret; 1209 } 1210 } 1211 1212 public long getLastAppliedTxId() { 1213 return lastAppliedTxId; 1214 } 1215 1216 /** 1217 * Creates a Step used for updating startup progress, populated with 1218 * information from the given edits. The step always includes the log's name. 1219 * If the log has a known length, then the length is included in the step too. 1220 * 1221 * @param edits EditLogInputStream to use for populating step 1222 * @return Step populated with information from edits 1223 * @throws IOException thrown if there is an I/O error 1224 */ 1225 private static Step createStartupProgressStep(EditLogInputStream edits) 1226 throws IOException { 1227 long length = edits.length(); 1228 String name = edits.getCurrentStreamName(); 1229 return length != -1 ? new Step(name, length) : new Step(name); 1230 } 1231}