001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.hdfs.server.namenode; 019 020 import static org.apache.hadoop.util.Time.now; 021 022 import java.io.File; 023 import java.io.FilterInputStream; 024 import java.io.IOException; 025 import java.io.InputStream; 026 import java.util.Arrays; 027 import java.util.EnumMap; 028 029 import org.apache.commons.logging.Log; 030 import org.apache.commons.logging.LogFactory; 031 import org.apache.hadoop.classification.InterfaceAudience; 032 import org.apache.hadoop.classification.InterfaceStability; 033 import org.apache.hadoop.hdfs.protocol.Block; 034 import org.apache.hadoop.hdfs.protocol.HdfsConstants; 035 import org.apache.hadoop.hdfs.protocol.LayoutVersion; 036 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; 037 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; 038 import org.apache.hadoop.hdfs.server.common.Storage; 039 import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream.LogHeaderCorruptException; 040 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp; 041 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp; 042 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp; 043 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp; 044 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp; 045 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp; 046 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp; 047 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp; 048 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp; 049 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp; 050 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp; 051 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp; 052 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampOp; 053 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp; 054 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp; 055 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp; 056 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp; 057 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp; 058 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp; 059 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp; 060 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp; 061 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp; 062 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 063 import org.apache.hadoop.hdfs.util.Holder; 064 065 import com.google.common.base.Joiner; 066 067 @InterfaceAudience.Private 068 @InterfaceStability.Evolving 069 public class FSEditLogLoader { 070 static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName()); 071 static long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec 072 private final FSNamesystem fsNamesys; 073 private long lastAppliedTxId; 074 075 public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) { 076 this.fsNamesys = fsNamesys; 077 this.lastAppliedTxId = lastAppliedTxId; 078 } 079 080 /** 081 * Load an edit log, and apply the changes to the in-memory structure 082 * This is where we apply edits that we've been writing to disk all 083 * along. 084 */ 085 long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId, 086 MetaRecoveryContext recovery) throws IOException { 087 fsNamesys.writeLock(); 088 try { 089 long startTime = now(); 090 long numEdits = loadEditRecords(edits, false, 091 expectedStartingTxId, recovery); 092 FSImage.LOG.info("Edits file " + edits.getName() 093 + " of size " + edits.length() + " edits # " + numEdits 094 + " loaded in " + (now()-startTime)/1000 + " seconds"); 095 return numEdits; 096 } finally { 097 edits.close(); 098 fsNamesys.writeUnlock(); 099 } 100 } 101 102 long loadEditRecords(EditLogInputStream in, boolean closeOnExit, 103 long expectedStartingTxId, MetaRecoveryContext recovery) 104 throws IOException { 105 FSDirectory fsDir = fsNamesys.dir; 106 107 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts = 108 new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class); 109 110 if (LOG.isTraceEnabled()) { 111 LOG.trace("Acquiring write lock to replay edit log"); 112 } 113 114 fsNamesys.writeLock(); 115 fsDir.writeLock(); 116 117 long recentOpcodeOffsets[] = new long[4]; 118 Arrays.fill(recentOpcodeOffsets, -1); 119 120 long expectedTxId = expectedStartingTxId; 121 long numEdits = 0; 122 long lastTxId = in.getLastTxId(); 123 long numTxns = (lastTxId - expectedStartingTxId) + 1; 124 long lastLogTime = now(); 125 126 try { 127 while (true) { 128 try { 129 FSEditLogOp op; 130 try { 131 op = in.readOp(); 132 if (op == null) { 133 break; 134 } 135 } catch (Throwable e) { 136 // Handle a problem with our input 137 check203UpgradeFailure(in.getVersion(), e); 138 String errorMessage = 139 formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId); 140 FSImage.LOG.error(errorMessage, e); 141 if (recovery == null) { 142 // We will only try to skip over problematic opcodes when in 143 // recovery mode. 144 throw new EditLogInputException(errorMessage, e, numEdits); 145 } 146 MetaRecoveryContext.editLogLoaderPrompt( 147 "We failed to read txId " + expectedTxId, 148 recovery, "skipping the bad section in the log"); 149 in.resync(); 150 continue; 151 } 152 recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] = 153 in.getPosition(); 154 if (op.hasTransactionId()) { 155 if (op.getTransactionId() > expectedTxId) { 156 MetaRecoveryContext.editLogLoaderPrompt("There appears " + 157 "to be a gap in the edit log. We expected txid " + 158 expectedTxId + ", but got txid " + 159 op.getTransactionId() + ".", recovery, "ignoring missing " + 160 " transaction IDs"); 161 } else if (op.getTransactionId() < expectedTxId) { 162 MetaRecoveryContext.editLogLoaderPrompt("There appears " + 163 "to be an out-of-order edit in the edit log. We " + 164 "expected txid " + expectedTxId + ", but got txid " + 165 op.getTransactionId() + ".", recovery, 166 "skipping the out-of-order edit"); 167 continue; 168 } 169 } 170 try { 171 applyEditLogOp(op, fsDir, in.getVersion()); 172 } catch (Throwable e) { 173 LOG.error("Encountered exception on operation " + op, e); 174 MetaRecoveryContext.editLogLoaderPrompt("Failed to " + 175 "apply edit log operation " + op + ": error " + 176 e.getMessage(), recovery, "applying edits"); 177 } 178 // Now that the operation has been successfully decoded and 179 // applied, update our bookkeeping. 180 incrOpCount(op.opCode, opCounts); 181 if (op.hasTransactionId()) { 182 lastAppliedTxId = op.getTransactionId(); 183 expectedTxId = lastAppliedTxId + 1; 184 } else { 185 expectedTxId = lastAppliedTxId = expectedStartingTxId; 186 } 187 // log progress 188 if (op.hasTransactionId()) { 189 long now = now(); 190 if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) { 191 long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1; 192 int percent = Math.round((float) deltaTxId / numTxns * 100); 193 LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns 194 + " transactions completed. (" + percent + "%)"); 195 lastLogTime = now; 196 } 197 } 198 numEdits++; 199 } catch (MetaRecoveryContext.RequestStopException e) { 200 MetaRecoveryContext.LOG.warn("Stopped reading edit log at " + 201 in.getPosition() + "/" + in.length()); 202 break; 203 } 204 } 205 } finally { 206 if(closeOnExit) { 207 in.close(); 208 } 209 fsDir.writeUnlock(); 210 fsNamesys.writeUnlock(); 211 212 if (LOG.isTraceEnabled()) { 213 LOG.trace("replaying edit log finished"); 214 } 215 216 if (FSImage.LOG.isDebugEnabled()) { 217 dumpOpCounts(opCounts); 218 } 219 } 220 return numEdits; 221 } 222 223 @SuppressWarnings("deprecation") 224 private void applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, 225 int logVersion) throws IOException { 226 227 if (LOG.isTraceEnabled()) { 228 LOG.trace("replaying edit log: " + op); 229 } 230 231 switch (op.opCode) { 232 case OP_ADD: { 233 AddCloseOp addCloseOp = (AddCloseOp)op; 234 if (FSNamesystem.LOG.isDebugEnabled()) { 235 FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path + 236 " numblocks : " + addCloseOp.blocks.length + 237 " clientHolder " + addCloseOp.clientName + 238 " clientMachine " + addCloseOp.clientMachine); 239 } 240 // There three cases here: 241 // 1. OP_ADD to create a new file 242 // 2. OP_ADD to update file blocks 243 // 3. OP_ADD to open file for append 244 245 // See if the file already exists (persistBlocks call) 246 INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path); 247 INodeFile newFile = oldFile; 248 if (oldFile == null) { // this is OP_ADD on a new file (case 1) 249 // versions > 0 support per file replication 250 // get name and replication 251 final short replication = fsNamesys.getBlockManager( 252 ).adjustReplication(addCloseOp.replication); 253 assert addCloseOp.blocks.length == 0; 254 255 // add to the file tree 256 newFile = (INodeFile)fsDir.unprotectedAddFile( 257 addCloseOp.path, addCloseOp.permissions, 258 replication, addCloseOp.mtime, 259 addCloseOp.atime, addCloseOp.blockSize, 260 true, addCloseOp.clientName, addCloseOp.clientMachine); 261 fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path); 262 263 } else { // This is OP_ADD on an existing file 264 if (!oldFile.isUnderConstruction()) { 265 // This is case 3: a call to append() on an already-closed file. 266 if (FSNamesystem.LOG.isDebugEnabled()) { 267 FSNamesystem.LOG.debug("Reopening an already-closed file " + 268 "for append"); 269 } 270 fsNamesys.prepareFileForWrite(addCloseOp.path, oldFile, 271 addCloseOp.clientName, addCloseOp.clientMachine, null, 272 false); 273 newFile = getINodeFile(fsDir, addCloseOp.path); 274 } 275 } 276 // Fall-through for case 2. 277 // Regardless of whether it's a new file or an updated file, 278 // update the block list. 279 280 // Update the salient file attributes. 281 newFile.setAccessTime(addCloseOp.atime); 282 newFile.setModificationTimeForce(addCloseOp.mtime); 283 updateBlocks(fsDir, addCloseOp, newFile); 284 break; 285 } 286 case OP_CLOSE: { 287 AddCloseOp addCloseOp = (AddCloseOp)op; 288 289 if (FSNamesystem.LOG.isDebugEnabled()) { 290 FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path + 291 " numblocks : " + addCloseOp.blocks.length + 292 " clientHolder " + addCloseOp.clientName + 293 " clientMachine " + addCloseOp.clientMachine); 294 } 295 296 INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path); 297 if (oldFile == null) { 298 throw new IOException("Operation trying to close non-existent file " + 299 addCloseOp.path); 300 } 301 302 // Update the salient file attributes. 303 oldFile.setAccessTime(addCloseOp.atime); 304 oldFile.setModificationTimeForce(addCloseOp.mtime); 305 updateBlocks(fsDir, addCloseOp, oldFile); 306 307 // Now close the file 308 if (!oldFile.isUnderConstruction() && 309 logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) { 310 // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE 311 // could show up twice in a row. But after that version, this 312 // should be fixed, so we should treat it as an error. 313 throw new IOException( 314 "File is not under construction: " + addCloseOp.path); 315 } 316 // One might expect that you could use removeLease(holder, path) here, 317 // but OP_CLOSE doesn't serialize the holder. So, remove by path. 318 if (oldFile.isUnderConstruction()) { 319 INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile; 320 fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path); 321 INodeFile newFile = ucFile.convertToInodeFile(); 322 fsDir.replaceNode(addCloseOp.path, ucFile, newFile); 323 } 324 break; 325 } 326 case OP_UPDATE_BLOCKS: { 327 UpdateBlocksOp updateOp = (UpdateBlocksOp)op; 328 if (FSNamesystem.LOG.isDebugEnabled()) { 329 FSNamesystem.LOG.debug(op.opCode + ": " + updateOp.path + 330 " numblocks : " + updateOp.blocks.length); 331 } 332 INodeFile oldFile = getINodeFile(fsDir, updateOp.path); 333 if (oldFile == null) { 334 throw new IOException( 335 "Operation trying to update blocks in non-existent file " + 336 updateOp.path); 337 } 338 339 // Update in-memory data structures 340 updateBlocks(fsDir, updateOp, oldFile); 341 break; 342 } 343 344 case OP_SET_REPLICATION: { 345 SetReplicationOp setReplicationOp = (SetReplicationOp)op; 346 short replication = fsNamesys.getBlockManager().adjustReplication( 347 setReplicationOp.replication); 348 fsDir.unprotectedSetReplication(setReplicationOp.path, 349 replication, null); 350 break; 351 } 352 case OP_CONCAT_DELETE: { 353 ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op; 354 fsDir.unprotectedConcat(concatDeleteOp.trg, concatDeleteOp.srcs, 355 concatDeleteOp.timestamp); 356 break; 357 } 358 case OP_RENAME_OLD: { 359 RenameOldOp renameOp = (RenameOldOp)op; 360 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, 361 renameOp.timestamp); 362 break; 363 } 364 case OP_DELETE: { 365 DeleteOp deleteOp = (DeleteOp)op; 366 fsDir.unprotectedDelete(deleteOp.path, deleteOp.timestamp); 367 break; 368 } 369 case OP_MKDIR: { 370 MkdirOp mkdirOp = (MkdirOp)op; 371 fsDir.unprotectedMkdir(mkdirOp.path, mkdirOp.permissions, 372 mkdirOp.timestamp); 373 break; 374 } 375 case OP_SET_GENSTAMP: { 376 SetGenstampOp setGenstampOp = (SetGenstampOp)op; 377 fsNamesys.setGenerationStamp(setGenstampOp.genStamp); 378 break; 379 } 380 case OP_SET_PERMISSIONS: { 381 SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op; 382 fsDir.unprotectedSetPermission(setPermissionsOp.src, 383 setPermissionsOp.permissions); 384 break; 385 } 386 case OP_SET_OWNER: { 387 SetOwnerOp setOwnerOp = (SetOwnerOp)op; 388 fsDir.unprotectedSetOwner(setOwnerOp.src, setOwnerOp.username, 389 setOwnerOp.groupname); 390 break; 391 } 392 case OP_SET_NS_QUOTA: { 393 SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op; 394 fsDir.unprotectedSetQuota(setNSQuotaOp.src, 395 setNSQuotaOp.nsQuota, 396 HdfsConstants.QUOTA_DONT_SET); 397 break; 398 } 399 case OP_CLEAR_NS_QUOTA: { 400 ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op; 401 fsDir.unprotectedSetQuota(clearNSQuotaOp.src, 402 HdfsConstants.QUOTA_RESET, 403 HdfsConstants.QUOTA_DONT_SET); 404 break; 405 } 406 407 case OP_SET_QUOTA: 408 SetQuotaOp setQuotaOp = (SetQuotaOp)op; 409 fsDir.unprotectedSetQuota(setQuotaOp.src, 410 setQuotaOp.nsQuota, 411 setQuotaOp.dsQuota); 412 break; 413 414 case OP_TIMES: { 415 TimesOp timesOp = (TimesOp)op; 416 417 fsDir.unprotectedSetTimes(timesOp.path, 418 timesOp.mtime, 419 timesOp.atime, true); 420 break; 421 } 422 case OP_SYMLINK: { 423 SymlinkOp symlinkOp = (SymlinkOp)op; 424 fsDir.unprotectedAddSymlink(symlinkOp.path, symlinkOp.value, 425 symlinkOp.mtime, symlinkOp.atime, 426 symlinkOp.permissionStatus); 427 break; 428 } 429 case OP_RENAME: { 430 RenameOp renameOp = (RenameOp)op; 431 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, 432 renameOp.timestamp, renameOp.options); 433 break; 434 } 435 case OP_GET_DELEGATION_TOKEN: { 436 GetDelegationTokenOp getDelegationTokenOp 437 = (GetDelegationTokenOp)op; 438 439 fsNamesys.getDelegationTokenSecretManager() 440 .addPersistedDelegationToken(getDelegationTokenOp.token, 441 getDelegationTokenOp.expiryTime); 442 break; 443 } 444 case OP_RENEW_DELEGATION_TOKEN: { 445 RenewDelegationTokenOp renewDelegationTokenOp 446 = (RenewDelegationTokenOp)op; 447 fsNamesys.getDelegationTokenSecretManager() 448 .updatePersistedTokenRenewal(renewDelegationTokenOp.token, 449 renewDelegationTokenOp.expiryTime); 450 break; 451 } 452 case OP_CANCEL_DELEGATION_TOKEN: { 453 CancelDelegationTokenOp cancelDelegationTokenOp 454 = (CancelDelegationTokenOp)op; 455 fsNamesys.getDelegationTokenSecretManager() 456 .updatePersistedTokenCancellation( 457 cancelDelegationTokenOp.token); 458 break; 459 } 460 case OP_UPDATE_MASTER_KEY: { 461 UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op; 462 fsNamesys.getDelegationTokenSecretManager() 463 .updatePersistedMasterKey(updateMasterKeyOp.key); 464 break; 465 } 466 case OP_REASSIGN_LEASE: { 467 ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op; 468 469 Lease lease = fsNamesys.leaseManager.getLease( 470 reassignLeaseOp.leaseHolder); 471 INodeFileUnderConstruction pendingFile = 472 INodeFileUnderConstruction.valueOf( 473 fsDir.getINode(reassignLeaseOp.path), reassignLeaseOp.path); 474 fsNamesys.reassignLeaseInternal(lease, 475 reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile); 476 break; 477 } 478 case OP_START_LOG_SEGMENT: 479 case OP_END_LOG_SEGMENT: { 480 // no data in here currently. 481 break; 482 } 483 default: 484 throw new IOException("Invalid operation read " + op.opCode); 485 } 486 } 487 488 private static String formatEditLogReplayError(EditLogInputStream in, 489 long recentOpcodeOffsets[], long txid) { 490 StringBuilder sb = new StringBuilder(); 491 sb.append("Error replaying edit log at offset " + in.getPosition()); 492 sb.append(". Expected transaction ID was ").append(txid); 493 if (recentOpcodeOffsets[0] != -1) { 494 Arrays.sort(recentOpcodeOffsets); 495 sb.append("\nRecent opcode offsets:"); 496 for (long offset : recentOpcodeOffsets) { 497 if (offset != -1) { 498 sb.append(' ').append(offset); 499 } 500 } 501 } 502 return sb.toString(); 503 } 504 505 private static INodeFile getINodeFile(FSDirectory fsDir, String path) 506 throws IOException { 507 INode inode = fsDir.getINode(path); 508 if (inode != null) { 509 if (!(inode instanceof INodeFile)) { 510 throw new IOException("Operation trying to get non-file " + path); 511 } 512 } 513 return (INodeFile)inode; 514 } 515 516 /** 517 * Update in-memory data structures with new block information. 518 * @throws IOException 519 */ 520 private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op, 521 INodeFile file) throws IOException { 522 // Update its block list 523 BlockInfo[] oldBlocks = file.getBlocks(); 524 Block[] newBlocks = op.getBlocks(); 525 String path = op.getPath(); 526 527 // Are we only updating the last block's gen stamp. 528 boolean isGenStampUpdate = oldBlocks.length == newBlocks.length; 529 530 // First, update blocks in common 531 for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) { 532 BlockInfo oldBlock = oldBlocks[i]; 533 Block newBlock = newBlocks[i]; 534 535 boolean isLastBlock = i == newBlocks.length - 1; 536 if (oldBlock.getBlockId() != newBlock.getBlockId() || 537 (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 538 !(isGenStampUpdate && isLastBlock))) { 539 throw new IOException("Mismatched block IDs or generation stamps, " + 540 "attempting to replace block " + oldBlock + " with " + newBlock + 541 " as block # " + i + "/" + newBlocks.length + " of " + 542 path); 543 } 544 545 oldBlock.setNumBytes(newBlock.getNumBytes()); 546 boolean changeMade = 547 oldBlock.getGenerationStamp() != newBlock.getGenerationStamp(); 548 oldBlock.setGenerationStamp(newBlock.getGenerationStamp()); 549 550 if (oldBlock instanceof BlockInfoUnderConstruction && 551 (!isLastBlock || op.shouldCompleteLastBlock())) { 552 changeMade = true; 553 fsNamesys.getBlockManager().forceCompleteBlock( 554 (INodeFileUnderConstruction)file, 555 (BlockInfoUnderConstruction)oldBlock); 556 } 557 if (changeMade) { 558 // The state or gen-stamp of the block has changed. So, we may be 559 // able to process some messages from datanodes that we previously 560 // were unable to process. 561 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 562 } 563 } 564 565 if (newBlocks.length < oldBlocks.length) { 566 // We're removing a block from the file, e.g. abandonBlock(...) 567 if (!file.isUnderConstruction()) { 568 throw new IOException("Trying to remove a block from file " + 569 path + " which is not under construction."); 570 } 571 if (newBlocks.length != oldBlocks.length - 1) { 572 throw new IOException("Trying to remove more than one block from file " 573 + path); 574 } 575 fsDir.unprotectedRemoveBlock(path, 576 (INodeFileUnderConstruction)file, oldBlocks[oldBlocks.length - 1]); 577 } else if (newBlocks.length > oldBlocks.length) { 578 // We're adding blocks 579 for (int i = oldBlocks.length; i < newBlocks.length; i++) { 580 Block newBlock = newBlocks[i]; 581 BlockInfo newBI; 582 if (!op.shouldCompleteLastBlock()) { 583 // TODO: shouldn't this only be true for the last block? 584 // what about an old-version fsync() where fsync isn't called 585 // until several blocks in? 586 newBI = new BlockInfoUnderConstruction( 587 newBlock, file.getBlockReplication()); 588 } else { 589 // OP_CLOSE should add finalized blocks. This code path 590 // is only executed when loading edits written by prior 591 // versions of Hadoop. Current versions always log 592 // OP_ADD operations as each block is allocated. 593 newBI = new BlockInfo(newBlock, file.getBlockReplication()); 594 } 595 fsNamesys.getBlockManager().addBlockCollection(newBI, file); 596 file.addBlock(newBI); 597 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); 598 } 599 } 600 } 601 602 private static void dumpOpCounts( 603 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) { 604 StringBuilder sb = new StringBuilder(); 605 sb.append("Summary of operations loaded from edit log:\n "); 606 Joiner.on("\n ").withKeyValueSeparator("=").appendTo(sb, opCounts); 607 FSImage.LOG.debug(sb.toString()); 608 } 609 610 private void incrOpCount(FSEditLogOpCodes opCode, 611 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) { 612 Holder<Integer> holder = opCounts.get(opCode); 613 if (holder == null) { 614 holder = new Holder<Integer>(1); 615 opCounts.put(opCode, holder); 616 } else { 617 holder.held++; 618 } 619 } 620 621 /** 622 * Throw appropriate exception during upgrade from 203, when editlog loading 623 * could fail due to opcode conflicts. 624 */ 625 private void check203UpgradeFailure(int logVersion, Throwable e) 626 throws IOException { 627 // 0.20.203 version version has conflicting opcodes with the later releases. 628 // The editlog must be emptied by restarting the namenode, before proceeding 629 // with the upgrade. 630 if (Storage.is203LayoutVersion(logVersion) 631 && logVersion != HdfsConstants.LAYOUT_VERSION) { 632 String msg = "During upgrade failed to load the editlog version " 633 + logVersion + " from release 0.20.203. Please go back to the old " 634 + " release and restart the namenode. This empties the editlog " 635 + " and saves the namespace. Resume the upgrade after this step."; 636 throw new IOException(msg, e); 637 } 638 } 639 640 /** 641 * Find the last valid transaction ID in the stream. 642 * If there are invalid or corrupt transactions in the middle of the stream, 643 * validateEditLog will skip over them. 644 * This reads through the stream but does not close it. 645 * 646 * @throws IOException if the stream cannot be read due to an IO error (eg 647 * if the log does not exist) 648 */ 649 static EditLogValidation validateEditLog(EditLogInputStream in) { 650 long lastPos = 0; 651 long lastTxId = HdfsConstants.INVALID_TXID; 652 long numValid = 0; 653 FSEditLogOp op = null; 654 while (true) { 655 lastPos = in.getPosition(); 656 try { 657 if ((op = in.readOp()) == null) { 658 break; 659 } 660 } catch (Throwable t) { 661 FSImage.LOG.warn("Caught exception after reading " + numValid + 662 " ops from " + in + " while determining its valid length." + 663 "Position was " + lastPos, t); 664 in.resync(); 665 FSImage.LOG.warn("After resync, position is " + in.getPosition()); 666 continue; 667 } 668 if (lastTxId == HdfsConstants.INVALID_TXID 669 || op.getTransactionId() > lastTxId) { 670 lastTxId = op.getTransactionId(); 671 } 672 numValid++; 673 } 674 return new EditLogValidation(lastPos, lastTxId, false); 675 } 676 677 static class EditLogValidation { 678 private final long validLength; 679 private final long endTxId; 680 private final boolean hasCorruptHeader; 681 682 EditLogValidation(long validLength, long endTxId, 683 boolean hasCorruptHeader) { 684 this.validLength = validLength; 685 this.endTxId = endTxId; 686 this.hasCorruptHeader = hasCorruptHeader; 687 } 688 689 long getValidLength() { return validLength; } 690 691 long getEndTxId() { return endTxId; } 692 693 boolean hasCorruptHeader() { return hasCorruptHeader; } 694 } 695 696 /** 697 * Stream wrapper that keeps track of the current stream position. 698 * 699 * This stream also allows us to set a limit on how many bytes we can read 700 * without getting an exception. 701 */ 702 public static class PositionTrackingInputStream extends FilterInputStream 703 implements StreamLimiter { 704 private long curPos = 0; 705 private long markPos = -1; 706 private long limitPos = Long.MAX_VALUE; 707 708 public PositionTrackingInputStream(InputStream is) { 709 super(is); 710 } 711 712 private void checkLimit(long amt) throws IOException { 713 long extra = (curPos + amt) - limitPos; 714 if (extra > 0) { 715 throw new IOException("Tried to read " + amt + " byte(s) past " + 716 "the limit at offset " + limitPos); 717 } 718 } 719 720 @Override 721 public int read() throws IOException { 722 checkLimit(1); 723 int ret = super.read(); 724 if (ret != -1) curPos++; 725 return ret; 726 } 727 728 @Override 729 public int read(byte[] data) throws IOException { 730 checkLimit(data.length); 731 int ret = super.read(data); 732 if (ret > 0) curPos += ret; 733 return ret; 734 } 735 736 @Override 737 public int read(byte[] data, int offset, int length) throws IOException { 738 checkLimit(length); 739 int ret = super.read(data, offset, length); 740 if (ret > 0) curPos += ret; 741 return ret; 742 } 743 744 @Override 745 public void setLimit(long limit) { 746 limitPos = curPos + limit; 747 } 748 749 @Override 750 public void clearLimit() { 751 limitPos = Long.MAX_VALUE; 752 } 753 754 @Override 755 public void mark(int limit) { 756 super.mark(limit); 757 markPos = curPos; 758 } 759 760 @Override 761 public void reset() throws IOException { 762 if (markPos == -1) { 763 throw new IOException("Not marked!"); 764 } 765 super.reset(); 766 curPos = markPos; 767 markPos = -1; 768 } 769 770 public long getPos() { 771 return curPos; 772 } 773 774 @Override 775 public long skip(long amt) throws IOException { 776 long extra = (curPos + amt) - limitPos; 777 if (extra > 0) { 778 throw new IOException("Tried to skip " + extra + " bytes past " + 779 "the limit at offset " + limitPos); 780 } 781 long ret = super.skip(amt); 782 curPos += ret; 783 return ret; 784 } 785 } 786 787 public long getLastAppliedTxId() { 788 return lastAppliedTxId; 789 } 790 }