001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TruncateOp;
021import static org.apache.hadoop.hdfs.server.namenode.FSImageFormat.renameReservedPathsOnUpgrade;
022import static org.apache.hadoop.util.Time.monotonicNow;
023
024import java.io.FilterInputStream;
025import java.io.IOException;
026import java.io.InputStream;
027import java.util.Arrays;
028import java.util.EnumMap;
029import java.util.EnumSet;
030import java.util.List;
031
032import org.apache.commons.logging.Log;
033import org.apache.commons.logging.LogFactory;
034import org.apache.hadoop.classification.InterfaceAudience;
035import org.apache.hadoop.classification.InterfaceStability;
036import org.apache.hadoop.fs.FileSystem;
037import org.apache.hadoop.fs.XAttrSetFlag;
038import org.apache.hadoop.hdfs.protocol.LocatedBlock;
039import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
040import org.apache.hadoop.hdfs.protocol.Block;
041import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
042import org.apache.hadoop.hdfs.protocol.HdfsConstants;
043import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
044import org.apache.hadoop.hdfs.protocol.LastBlockWithStatus;
045import org.apache.hadoop.hdfs.protocol.LayoutVersion;
046import org.apache.hadoop.hdfs.protocol.LocatedBlock;
047import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous;
048import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguousUnderConstruction;
049import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
050import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
051import org.apache.hadoop.hdfs.server.common.Storage;
052import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp;
053import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp;
054import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCachePoolOp;
055import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp;
056import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp;
057import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp;
058import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AppendOp;
059import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp;
060import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp;
061import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp;
062import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp;
063import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CreateSnapshotOp;
064import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
065import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteSnapshotOp;
066import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp;
067import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp;
068import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
069import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCacheDirectiveInfoOp;
070import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCachePoolOp;
071import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp;
072import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCacheDirectiveInfoOp;
073import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCachePoolOp;
074import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveXAttrOp;
075import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp;
076import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp;
077import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp;
078import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp;
079import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetAclOp;
080import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp;
081import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op;
082import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op;
083import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp;
084import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp;
085import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp;
086import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp;
087import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp;
088import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetStoragePolicyOp;
089import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetXAttrOp;
090import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp;
091import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
092import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
093import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
094import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
095import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
096import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
097import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
098import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
099import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
100import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
101import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
102import org.apache.hadoop.hdfs.util.Holder;
103import org.apache.hadoop.util.ChunkedArrayList;
104
105import com.google.common.base.Joiner;
106import com.google.common.base.Preconditions;
107
108@InterfaceAudience.Private
109@InterfaceStability.Evolving
110public class FSEditLogLoader {
111  static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName());
112  static final long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec
113
114  private final FSNamesystem fsNamesys;
115  private long lastAppliedTxId;
116  /** Total number of end transactions loaded. */
117  private int totalEdits = 0;
118  
119  public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) {
120    this.fsNamesys = fsNamesys;
121    this.lastAppliedTxId = lastAppliedTxId;
122  }
123  
124  long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId)
125      throws IOException {
126    return loadFSEdits(edits, expectedStartingTxId, null, null);
127  }
128
129  /**
130   * Load an edit log, and apply the changes to the in-memory structure
131   * This is where we apply edits that we've been writing to disk all
132   * along.
133   */
134  long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId,
135      StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
136    StartupProgress prog = NameNode.getStartupProgress();
137    Step step = createStartupProgressStep(edits);
138    prog.beginStep(Phase.LOADING_EDITS, step);
139    fsNamesys.writeLock();
140    try {
141      long startTime = monotonicNow();
142      FSImage.LOG.info("Start loading edits file " + edits.getName());
143      long numEdits = loadEditRecords(edits, false, expectedStartingTxId,
144          startOpt, recovery);
145      FSImage.LOG.info("Edits file " + edits.getName() 
146          + " of size " + edits.length() + " edits # " + numEdits 
147          + " loaded in " + (monotonicNow()-startTime)/1000 + " seconds");
148      return numEdits;
149    } finally {
150      edits.close();
151      fsNamesys.writeUnlock("loadFSEdits");
152      prog.endStep(Phase.LOADING_EDITS, step);
153    }
154  }
155
156  long loadEditRecords(EditLogInputStream in, boolean closeOnExit,
157      long expectedStartingTxId, StartupOption startOpt,
158      MetaRecoveryContext recovery) throws IOException {
159    FSDirectory fsDir = fsNamesys.dir;
160
161    EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
162      new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
163
164    if (LOG.isTraceEnabled()) {
165      LOG.trace("Acquiring write lock to replay edit log");
166    }
167
168    fsNamesys.writeLock();
169    fsDir.writeLock();
170
171    long recentOpcodeOffsets[] = new long[4];
172    Arrays.fill(recentOpcodeOffsets, -1);
173    
174    long expectedTxId = expectedStartingTxId;
175    long numEdits = 0;
176    long lastTxId = in.getLastTxId();
177    long numTxns = (lastTxId - expectedStartingTxId) + 1;
178    StartupProgress prog = NameNode.getStartupProgress();
179    Step step = createStartupProgressStep(in);
180    prog.setTotal(Phase.LOADING_EDITS, step, numTxns);
181    Counter counter = prog.getCounter(Phase.LOADING_EDITS, step);
182    long lastLogTime = monotonicNow();
183    long lastInodeId = fsNamesys.dir.getLastInodeId();
184    
185    try {
186      while (true) {
187        try {
188          FSEditLogOp op;
189          try {
190            op = in.readOp();
191            if (op == null) {
192              break;
193            }
194          } catch (Throwable e) {
195            // Handle a problem with our input
196            check203UpgradeFailure(in.getVersion(true), e);
197            String errorMessage =
198              formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
199            FSImage.LOG.error(errorMessage, e);
200            if (recovery == null) {
201               // We will only try to skip over problematic opcodes when in
202               // recovery mode.
203              throw new EditLogInputException(errorMessage, e, numEdits);
204            }
205            MetaRecoveryContext.editLogLoaderPrompt(
206                "We failed to read txId " + expectedTxId,
207                recovery, "skipping the bad section in the log");
208            in.resync();
209            continue;
210          }
211          recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] =
212            in.getPosition();
213          if (op.hasTransactionId()) {
214            if (op.getTransactionId() > expectedTxId) { 
215              MetaRecoveryContext.editLogLoaderPrompt("There appears " +
216                  "to be a gap in the edit log.  We expected txid " +
217                  expectedTxId + ", but got txid " +
218                  op.getTransactionId() + ".", recovery, "ignoring missing " +
219                  " transaction IDs");
220            } else if (op.getTransactionId() < expectedTxId) { 
221              MetaRecoveryContext.editLogLoaderPrompt("There appears " +
222                  "to be an out-of-order edit in the edit log.  We " +
223                  "expected txid " + expectedTxId + ", but got txid " +
224                  op.getTransactionId() + ".", recovery,
225                  "skipping the out-of-order edit");
226              continue;
227            }
228          }
229          try {
230            if (LOG.isTraceEnabled()) {
231              LOG.trace("op=" + op + ", startOpt=" + startOpt
232                  + ", numEdits=" + numEdits + ", totalEdits=" + totalEdits);
233            }
234            long inodeId = applyEditLogOp(op, fsDir, startOpt,
235                in.getVersion(true), lastInodeId);
236            if (lastInodeId < inodeId) {
237              lastInodeId = inodeId;
238            }
239          } catch (RollingUpgradeOp.RollbackException e) {
240            throw e;
241          } catch (Throwable e) {
242            LOG.error("Encountered exception on operation " + op, e);
243            if (recovery == null) {
244              throw e instanceof IOException? (IOException)e: new IOException(e);
245            }
246
247            MetaRecoveryContext.editLogLoaderPrompt("Failed to " +
248             "apply edit log operation " + op + ": error " +
249             e.getMessage(), recovery, "applying edits");
250          }
251          // Now that the operation has been successfully decoded and
252          // applied, update our bookkeeping.
253          incrOpCount(op.opCode, opCounts, step, counter);
254          if (op.hasTransactionId()) {
255            lastAppliedTxId = op.getTransactionId();
256            expectedTxId = lastAppliedTxId + 1;
257          } else {
258            expectedTxId = lastAppliedTxId = expectedStartingTxId;
259          }
260          // log progress
261          if (op.hasTransactionId()) {
262            long now = monotonicNow();
263            if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
264              long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
265              int percent = Math.round((float) deltaTxId / numTxns * 100);
266              LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns
267                  + " transactions completed. (" + percent + "%)");
268              lastLogTime = now;
269            }
270          }
271          numEdits++;
272          totalEdits++;
273        } catch (RollingUpgradeOp.RollbackException e) {
274          LOG.info("Stopped at OP_START_ROLLING_UPGRADE for rollback.");
275          break;
276        } catch (MetaRecoveryContext.RequestStopException e) {
277          MetaRecoveryContext.LOG.warn("Stopped reading edit log at " +
278              in.getPosition() + "/"  + in.length());
279          break;
280        }
281      }
282    } finally {
283      fsNamesys.dir.resetLastInodeId(lastInodeId);
284      if(closeOnExit) {
285        in.close();
286      }
287      fsDir.writeUnlock();
288      fsNamesys.writeUnlock("loadEditRecords");
289
290      if (LOG.isTraceEnabled()) {
291        LOG.trace("replaying edit log finished");
292      }
293
294      if (FSImage.LOG.isDebugEnabled()) {
295        dumpOpCounts(opCounts);
296      }
297    }
298    return numEdits;
299  }
300  
301  // allocate and update last allocated inode id
302  private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion,
303      long lastInodeId) throws IOException {
304    long inodeId = inodeIdFromOp;
305
306    if (inodeId == INodeId.GRANDFATHER_INODE_ID) {
307      if (NameNodeLayoutVersion.supports(
308          LayoutVersion.Feature.ADD_INODE_ID, logVersion)) {
309        throw new IOException("The layout version " + logVersion
310            + " supports inodeId but gave bogus inodeId");
311      }
312      inodeId = fsNamesys.dir.allocateNewInodeId();
313    } else {
314      // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from
315      // fsimage but editlog captures more recent inodeId allocations
316      if (inodeId > lastInodeId) {
317        fsNamesys.dir.resetLastInodeId(inodeId);
318      }
319    }
320    return inodeId;
321  }
322
323  @SuppressWarnings("deprecation")
324  private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
325      StartupOption startOpt, int logVersion, long lastInodeId) throws IOException {
326    long inodeId = INodeId.GRANDFATHER_INODE_ID;
327    if (LOG.isTraceEnabled()) {
328      LOG.trace("replaying edit log: " + op);
329    }
330    final boolean toAddRetryCache = fsNamesys.hasRetryCache() && op.hasRpcIds();
331
332    switch (op.opCode) {
333    case OP_ADD: {
334      AddCloseOp addCloseOp = (AddCloseOp)op;
335      final String path =
336          renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
337      if (FSNamesystem.LOG.isDebugEnabled()) {
338        FSNamesystem.LOG.debug(op.opCode + ": " + path +
339            " numblocks : " + addCloseOp.blocks.length +
340            " clientHolder " + addCloseOp.clientName +
341            " clientMachine " + addCloseOp.clientMachine);
342      }
343      // There are 3 cases here:
344      // 1. OP_ADD to create a new file
345      // 2. OP_ADD to update file blocks
346      // 3. OP_ADD to open file for append (old append)
347
348      // See if the file already exists (persistBlocks call)
349      INodesInPath iip = fsDir.getINodesInPath(path, true);
350      INodeFile oldFile = INodeFile.valueOf(iip.getLastINode(), path, true);
351      if (oldFile != null && addCloseOp.overwrite) {
352        // This is OP_ADD with overwrite
353        FSDirDeleteOp.deleteForEditLog(fsDir, path, addCloseOp.mtime);
354        iip = INodesInPath.replace(iip, iip.length() - 1, null);
355        oldFile = null;
356      }
357      INodeFile newFile = oldFile;
358      if (oldFile == null) { // this is OP_ADD on a new file (case 1)
359        // versions > 0 support per file replication
360        // get name and replication
361        final short replication = fsNamesys.getBlockManager()
362            .adjustReplication(addCloseOp.replication);
363        assert addCloseOp.blocks.length == 0;
364
365        // add to the file tree
366        inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion, lastInodeId);
367        newFile = fsDir.addFileForEditLog(inodeId, iip.getExistingINodes(),
368            iip.getLastLocalName(),
369            addCloseOp.permissions,
370            addCloseOp.aclEntries,
371            addCloseOp.xAttrs, replication,
372            addCloseOp.mtime, addCloseOp.atime,
373            addCloseOp.blockSize, true,
374            addCloseOp.clientName,
375            addCloseOp.clientMachine,
376            addCloseOp.storagePolicyId);
377        assert newFile != null;
378        iip = INodesInPath.replace(iip, iip.length() - 1, newFile);
379        fsNamesys.leaseManager.addLease(addCloseOp.clientName, path);
380
381        // add the op into retry cache if necessary
382        if (toAddRetryCache) {
383          HdfsFileStatus stat = FSDirStatAndListingOp.createFileStatusForEditLog(
384              fsNamesys.dir, path, HdfsFileStatus.EMPTY_NAME,
385                  BlockStoragePolicySuite.ID_UNSPECIFIED, Snapshot.CURRENT_STATE_ID,
386              false, iip);
387          fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
388              addCloseOp.rpcCallId, stat);
389        }
390      } else { // This is OP_ADD on an existing file (old append)
391        if (!oldFile.isUnderConstruction()) {
392          // This is case 3: a call to append() on an already-closed file.
393          if (FSNamesystem.LOG.isDebugEnabled()) {
394            FSNamesystem.LOG.debug("Reopening an already-closed file " +
395                "for append");
396          }
397          LocatedBlock lb = fsNamesys.prepareFileForAppend(path, iip,
398              addCloseOp.clientName, addCloseOp.clientMachine, false, false,
399              false);
400          // add the op into retry cache if necessary
401          if (toAddRetryCache) {
402            HdfsFileStatus stat = FSDirStatAndListingOp.createFileStatusForEditLog(
403                fsNamesys.dir, path, HdfsFileStatus.EMPTY_NAME,
404                    BlockStoragePolicySuite.ID_UNSPECIFIED,
405                Snapshot.CURRENT_STATE_ID, false, iip);
406            fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
407                addCloseOp.rpcCallId, new LastBlockWithStatus(lb, stat));
408          }
409        }
410      }
411      // Fall-through for case 2.
412      // Regardless of whether it's a new file or an updated file,
413      // update the block list.
414      
415      // Update the salient file attributes.
416      newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
417      newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
418      updateBlocks(fsDir, addCloseOp, iip, newFile);
419      break;
420    }
421    case OP_CLOSE: {
422      AddCloseOp addCloseOp = (AddCloseOp)op;
423      final String path =
424          renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
425      if (FSNamesystem.LOG.isDebugEnabled()) {
426        FSNamesystem.LOG.debug(op.opCode + ": " + path +
427            " numblocks : " + addCloseOp.blocks.length +
428            " clientHolder " + addCloseOp.clientName +
429            " clientMachine " + addCloseOp.clientMachine);
430      }
431
432      final INodesInPath iip = fsDir.getINodesInPath(path, true);
433      final INodeFile file = INodeFile.valueOf(iip.getLastINode(), path);
434
435      // Update the salient file attributes.
436      file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
437      file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
438      updateBlocks(fsDir, addCloseOp, iip, file);
439
440      // Now close the file
441      if (!file.isUnderConstruction() &&
442          logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
443        // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
444        // could show up twice in a row. But after that version, this
445        // should be fixed, so we should treat it as an error.
446        throw new IOException(
447            "File is not under construction: " + path);
448      }
449      // One might expect that you could use removeLease(holder, path) here,
450      // but OP_CLOSE doesn't serialize the holder. So, remove by path.
451      if (file.isUnderConstruction()) {
452        fsNamesys.leaseManager.removeLeaseWithPrefixPath(path);
453        file.toCompleteFile(file.getModificationTime());
454      }
455      break;
456    }
457    case OP_APPEND: {
458      AppendOp appendOp = (AppendOp) op;
459      final String path = renameReservedPathsOnUpgrade(appendOp.path,
460          logVersion);
461      if (FSNamesystem.LOG.isDebugEnabled()) {
462        FSNamesystem.LOG.debug(op.opCode + ": " + path +
463            " clientName " + appendOp.clientName +
464            " clientMachine " + appendOp.clientMachine +
465            " newBlock " + appendOp.newBlock);
466      }
467      INodesInPath iip = fsDir.getINodesInPath4Write(path);
468      INodeFile file = INodeFile.valueOf(iip.getLastINode(), path);
469      if (!file.isUnderConstruction()) {
470        LocatedBlock lb = fsNamesys.prepareFileForAppend(path, iip,
471            appendOp.clientName, appendOp.clientMachine, appendOp.newBlock,
472            false, false);
473        // add the op into retry cache if necessary
474        if (toAddRetryCache) {
475          HdfsFileStatus stat = FSDirStatAndListingOp.createFileStatusForEditLog(
476              fsNamesys.dir, path, HdfsFileStatus.EMPTY_NAME,
477                  BlockStoragePolicySuite.ID_UNSPECIFIED,
478              Snapshot.CURRENT_STATE_ID, false, iip);
479          fsNamesys.addCacheEntryWithPayload(appendOp.rpcClientId,
480              appendOp.rpcCallId, new LastBlockWithStatus(lb, stat));
481        }
482      }
483      break;
484    }
485    case OP_UPDATE_BLOCKS: {
486      UpdateBlocksOp updateOp = (UpdateBlocksOp)op;
487      final String path =
488          renameReservedPathsOnUpgrade(updateOp.path, logVersion);
489      if (FSNamesystem.LOG.isDebugEnabled()) {
490        FSNamesystem.LOG.debug(op.opCode + ": " + path +
491            " numblocks : " + updateOp.blocks.length);
492      }
493      INodesInPath iip = fsDir.getINodesInPath(path, true);
494      INodeFile oldFile = INodeFile.valueOf(iip.getLastINode(), path);
495      // Update in-memory data structures
496      updateBlocks(fsDir, updateOp, iip, oldFile);
497      
498      if (toAddRetryCache) {
499        fsNamesys.addCacheEntry(updateOp.rpcClientId, updateOp.rpcCallId);
500      }
501      break;
502    }
503    case OP_ADD_BLOCK: {
504      AddBlockOp addBlockOp = (AddBlockOp) op;
505      String path = renameReservedPathsOnUpgrade(addBlockOp.getPath(), logVersion);
506      if (FSNamesystem.LOG.isDebugEnabled()) {
507        FSNamesystem.LOG.debug(op.opCode + ": " + path +
508            " new block id : " + addBlockOp.getLastBlock().getBlockId());
509      }
510      INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path), path);
511      // add the new block to the INodeFile
512      addNewBlock(fsDir, addBlockOp, oldFile);
513      break;
514    }
515    case OP_SET_REPLICATION: {
516      SetReplicationOp setReplicationOp = (SetReplicationOp)op;
517      short replication = fsNamesys.getBlockManager().adjustReplication(
518          setReplicationOp.replication);
519      FSDirAttrOp.unprotectedSetReplication(fsDir, renameReservedPathsOnUpgrade(
520          setReplicationOp.path, logVersion), replication, null);
521      break;
522    }
523    case OP_CONCAT_DELETE: {
524      ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
525      String trg = renameReservedPathsOnUpgrade(concatDeleteOp.trg, logVersion);
526      String[] srcs = new String[concatDeleteOp.srcs.length];
527      for (int i=0; i<srcs.length; i++) {
528        srcs[i] =
529            renameReservedPathsOnUpgrade(concatDeleteOp.srcs[i], logVersion);
530      }
531      INodesInPath targetIIP = fsDir.getINodesInPath4Write(trg);
532      INodeFile[] srcFiles = new INodeFile[srcs.length];
533      for (int i = 0; i < srcs.length; i++) {
534        INodesInPath srcIIP = fsDir.getINodesInPath4Write(srcs[i]);
535        srcFiles[i] = srcIIP.getLastINode().asFile();
536      }
537      FSDirConcatOp.unprotectedConcat(fsDir, targetIIP, srcFiles,
538          concatDeleteOp.timestamp);
539      
540      if (toAddRetryCache) {
541        fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId,
542            concatDeleteOp.rpcCallId);
543      }
544      break;
545    }
546    case OP_RENAME_OLD: {
547      RenameOldOp renameOp = (RenameOldOp)op;
548      final String src = renameReservedPathsOnUpgrade(renameOp.src, logVersion);
549      final String dst = renameReservedPathsOnUpgrade(renameOp.dst, logVersion);
550      FSDirRenameOp.renameForEditLog(fsDir, src, dst, renameOp.timestamp);
551      
552      if (toAddRetryCache) {
553        fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
554      }
555      break;
556    }
557    case OP_DELETE: {
558      DeleteOp deleteOp = (DeleteOp)op;
559      FSDirDeleteOp.deleteForEditLog(
560          fsDir, renameReservedPathsOnUpgrade(deleteOp.path, logVersion),
561          deleteOp.timestamp);
562      
563      if (toAddRetryCache) {
564        fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId);
565      }
566      break;
567    }
568    case OP_MKDIR: {
569      MkdirOp mkdirOp = (MkdirOp)op;
570      inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion,
571          lastInodeId);
572      FSDirMkdirOp.mkdirForEditLog(fsDir, inodeId,
573          renameReservedPathsOnUpgrade(mkdirOp.path, logVersion),
574          mkdirOp.permissions, mkdirOp.aclEntries, mkdirOp.timestamp);
575      break;
576    }
577    case OP_SET_GENSTAMP_V1: {
578      SetGenstampV1Op setGenstampV1Op = (SetGenstampV1Op)op;
579      fsNamesys.getBlockIdManager().setGenerationStampV1(
580          setGenstampV1Op.genStampV1);
581      break;
582    }
583    case OP_SET_PERMISSIONS: {
584      SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
585      FSDirAttrOp.unprotectedSetPermission(fsDir, renameReservedPathsOnUpgrade(
586          setPermissionsOp.src, logVersion), setPermissionsOp.permissions);
587      break;
588    }
589    case OP_SET_OWNER: {
590      SetOwnerOp setOwnerOp = (SetOwnerOp)op;
591      FSDirAttrOp.unprotectedSetOwner(
592          fsDir, renameReservedPathsOnUpgrade(setOwnerOp.src, logVersion),
593          setOwnerOp.username, setOwnerOp.groupname);
594      break;
595    }
596    case OP_SET_NS_QUOTA: {
597      SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
598      FSDirAttrOp.unprotectedSetQuota(
599          fsDir, renameReservedPathsOnUpgrade(setNSQuotaOp.src, logVersion),
600          setNSQuotaOp.nsQuota, HdfsConstants.QUOTA_DONT_SET, null);
601      break;
602    }
603    case OP_CLEAR_NS_QUOTA: {
604      ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
605      FSDirAttrOp.unprotectedSetQuota(
606          fsDir, renameReservedPathsOnUpgrade(clearNSQuotaOp.src, logVersion),
607          HdfsConstants.QUOTA_RESET, HdfsConstants.QUOTA_DONT_SET, null);
608      break;
609    }
610
611    case OP_SET_QUOTA:
612      SetQuotaOp setQuotaOp = (SetQuotaOp) op;
613      FSDirAttrOp.unprotectedSetQuota(fsDir,
614          renameReservedPathsOnUpgrade(setQuotaOp.src, logVersion),
615          setQuotaOp.nsQuota, setQuotaOp.dsQuota, null);
616      break;
617
618    case OP_SET_QUOTA_BY_STORAGETYPE:
619        FSEditLogOp.SetQuotaByStorageTypeOp setQuotaByStorageTypeOp =
620          (FSEditLogOp.SetQuotaByStorageTypeOp) op;
621        FSDirAttrOp.unprotectedSetQuota(fsDir,
622          renameReservedPathsOnUpgrade(setQuotaByStorageTypeOp.src, logVersion),
623          HdfsConstants.QUOTA_DONT_SET, setQuotaByStorageTypeOp.dsQuota,
624          setQuotaByStorageTypeOp.type);
625        break;
626
627    case OP_TIMES: {
628      TimesOp timesOp = (TimesOp)op;
629      FSDirAttrOp.unprotectedSetTimes(
630          fsDir, renameReservedPathsOnUpgrade(timesOp.path, logVersion),
631          timesOp.mtime, timesOp.atime, true);
632      break;
633    }
634    case OP_SYMLINK: {
635      if (!FileSystem.areSymlinksEnabled()) {
636        throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS");
637      }
638      SymlinkOp symlinkOp = (SymlinkOp)op;
639      inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion,
640          lastInodeId);
641      final String path = renameReservedPathsOnUpgrade(symlinkOp.path,
642          logVersion);
643      final INodesInPath iip = fsDir.getINodesInPath(path, false);
644      FSDirSymlinkOp.unprotectedAddSymlink(fsDir, iip.getExistingINodes(),
645          iip.getLastLocalName(), inodeId, symlinkOp.value, symlinkOp.mtime,
646          symlinkOp.atime, symlinkOp.permissionStatus);
647      
648      if (toAddRetryCache) {
649        fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId);
650      }
651      break;
652    }
653    case OP_RENAME: {
654      RenameOp renameOp = (RenameOp)op;
655      FSDirRenameOp.renameForEditLog(fsDir,
656          renameReservedPathsOnUpgrade(renameOp.src, logVersion),
657          renameReservedPathsOnUpgrade(renameOp.dst, logVersion),
658          renameOp.timestamp, renameOp.options);
659      
660      if (toAddRetryCache) {
661        fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
662      }
663      break;
664    }
665    case OP_GET_DELEGATION_TOKEN: {
666      GetDelegationTokenOp getDelegationTokenOp
667        = (GetDelegationTokenOp)op;
668
669      fsNamesys.getDelegationTokenSecretManager()
670        .addPersistedDelegationToken(getDelegationTokenOp.token,
671                                     getDelegationTokenOp.expiryTime);
672      break;
673    }
674    case OP_RENEW_DELEGATION_TOKEN: {
675      RenewDelegationTokenOp renewDelegationTokenOp
676        = (RenewDelegationTokenOp)op;
677      fsNamesys.getDelegationTokenSecretManager()
678        .updatePersistedTokenRenewal(renewDelegationTokenOp.token,
679                                     renewDelegationTokenOp.expiryTime);
680      break;
681    }
682    case OP_CANCEL_DELEGATION_TOKEN: {
683      CancelDelegationTokenOp cancelDelegationTokenOp
684        = (CancelDelegationTokenOp)op;
685      fsNamesys.getDelegationTokenSecretManager()
686          .updatePersistedTokenCancellation(
687              cancelDelegationTokenOp.token);
688      break;
689    }
690    case OP_UPDATE_MASTER_KEY: {
691      UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op;
692      fsNamesys.getDelegationTokenSecretManager()
693        .updatePersistedMasterKey(updateMasterKeyOp.key);
694      break;
695    }
696    case OP_REASSIGN_LEASE: {
697      ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op;
698
699      Lease lease = fsNamesys.leaseManager.getLease(
700          reassignLeaseOp.leaseHolder);
701      final String path =
702          renameReservedPathsOnUpgrade(reassignLeaseOp.path, logVersion);
703      INodeFile pendingFile = fsDir.getINode(path).asFile();
704      Preconditions.checkState(pendingFile.isUnderConstruction());
705      fsNamesys.reassignLeaseInternal(lease,
706          path, reassignLeaseOp.newHolder, pendingFile);
707      break;
708    }
709    case OP_START_LOG_SEGMENT:
710    case OP_END_LOG_SEGMENT: {
711      // no data in here currently.
712      break;
713    }
714    case OP_CREATE_SNAPSHOT: {
715      CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op;
716      final String snapshotRoot =
717          renameReservedPathsOnUpgrade(createSnapshotOp.snapshotRoot,
718              logVersion);
719      INodesInPath iip = fsDir.getINodesInPath4Write(snapshotRoot);
720      String path = fsNamesys.getSnapshotManager().createSnapshot(iip,
721          snapshotRoot, createSnapshotOp.snapshotName);
722      if (toAddRetryCache) {
723        fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId,
724            createSnapshotOp.rpcCallId, path);
725      }
726      break;
727    }
728    case OP_DELETE_SNAPSHOT: {
729      DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op;
730      BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
731      List<INode> removedINodes = new ChunkedArrayList<INode>();
732      final String snapshotRoot =
733          renameReservedPathsOnUpgrade(deleteSnapshotOp.snapshotRoot,
734              logVersion);
735      INodesInPath iip = fsDir.getINodesInPath4Write(snapshotRoot);
736      fsNamesys.getSnapshotManager().deleteSnapshot(
737          iip, deleteSnapshotOp.snapshotName,
738          collectedBlocks, removedINodes);
739      fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
740      collectedBlocks.clear();
741      fsNamesys.dir.removeFromInodeMap(removedINodes);
742      removedINodes.clear();
743      
744      if (toAddRetryCache) {
745        fsNamesys.addCacheEntry(deleteSnapshotOp.rpcClientId,
746            deleteSnapshotOp.rpcCallId);
747      }
748      break;
749    }
750    case OP_RENAME_SNAPSHOT: {
751      RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op;
752      final String snapshotRoot =
753          renameReservedPathsOnUpgrade(renameSnapshotOp.snapshotRoot,
754              logVersion);
755      INodesInPath iip = fsDir.getINodesInPath4Write(snapshotRoot);
756      fsNamesys.getSnapshotManager().renameSnapshot(iip,
757          snapshotRoot, renameSnapshotOp.snapshotOldName,
758          renameSnapshotOp.snapshotNewName);
759      
760      if (toAddRetryCache) {
761        fsNamesys.addCacheEntry(renameSnapshotOp.rpcClientId,
762            renameSnapshotOp.rpcCallId);
763      }
764      break;
765    }
766    case OP_ALLOW_SNAPSHOT: {
767      AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op;
768      final String snapshotRoot =
769          renameReservedPathsOnUpgrade(allowSnapshotOp.snapshotRoot, logVersion);
770      fsNamesys.getSnapshotManager().setSnapshottable(
771          snapshotRoot, false);
772      break;
773    }
774    case OP_DISALLOW_SNAPSHOT: {
775      DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op;
776      final String snapshotRoot =
777          renameReservedPathsOnUpgrade(disallowSnapshotOp.snapshotRoot,
778              logVersion);
779      fsNamesys.getSnapshotManager().resetSnapshottable(
780          snapshotRoot);
781      break;
782    }
783    case OP_SET_GENSTAMP_V2: {
784      SetGenstampV2Op setGenstampV2Op = (SetGenstampV2Op) op;
785      fsNamesys.getBlockIdManager().setGenerationStampV2(
786          setGenstampV2Op.genStampV2);
787      break;
788    }
789    case OP_ALLOCATE_BLOCK_ID: {
790      AllocateBlockIdOp allocateBlockIdOp = (AllocateBlockIdOp) op;
791      fsNamesys.getBlockIdManager().setLastAllocatedBlockId(
792          allocateBlockIdOp.blockId);
793      break;
794    }
795    case OP_ROLLING_UPGRADE_START: {
796      if (startOpt == StartupOption.ROLLINGUPGRADE) {
797        final RollingUpgradeStartupOption rollingUpgradeOpt
798            = startOpt.getRollingUpgradeStartupOption(); 
799        if (rollingUpgradeOpt == RollingUpgradeStartupOption.ROLLBACK) {
800          throw new RollingUpgradeOp.RollbackException();
801        } else if (rollingUpgradeOpt == RollingUpgradeStartupOption.DOWNGRADE) {
802          //ignore upgrade marker
803          break;
804        }
805      }
806      // start rolling upgrade
807      final long startTime = ((RollingUpgradeOp) op).getTime();
808      fsNamesys.startRollingUpgradeInternal(startTime);
809      fsNamesys.triggerRollbackCheckpoint();
810      break;
811    }
812    case OP_ROLLING_UPGRADE_FINALIZE: {
813      final long finalizeTime = ((RollingUpgradeOp) op).getTime();
814      if (fsNamesys.isRollingUpgrade()) {
815        // Only do it when NN is actually doing rolling upgrade.
816        // We can get FINALIZE without corresponding START, if NN is restarted
817        // before this op is consumed and a new checkpoint is created.
818        fsNamesys.finalizeRollingUpgradeInternal(finalizeTime);
819      }
820      fsNamesys.getFSImage().updateStorageVersion();
821      fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
822          NameNodeFile.IMAGE);
823      break;
824    }
825    case OP_ADD_CACHE_DIRECTIVE: {
826      AddCacheDirectiveInfoOp addOp = (AddCacheDirectiveInfoOp) op;
827      CacheDirectiveInfo result = fsNamesys.
828          getCacheManager().addDirectiveFromEditLog(addOp.directive);
829      if (toAddRetryCache) {
830        Long id = result.getId();
831        fsNamesys.addCacheEntryWithPayload(op.rpcClientId, op.rpcCallId, id);
832      }
833      break;
834    }
835    case OP_MODIFY_CACHE_DIRECTIVE: {
836      ModifyCacheDirectiveInfoOp modifyOp =
837          (ModifyCacheDirectiveInfoOp) op;
838      fsNamesys.getCacheManager().modifyDirectiveFromEditLog(
839          modifyOp.directive);
840      if (toAddRetryCache) {
841        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
842      }
843      break;
844    }
845    case OP_REMOVE_CACHE_DIRECTIVE: {
846      RemoveCacheDirectiveInfoOp removeOp =
847          (RemoveCacheDirectiveInfoOp) op;
848      fsNamesys.getCacheManager().removeDirective(removeOp.id, null);
849      if (toAddRetryCache) {
850        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
851      }
852      break;
853    }
854    case OP_ADD_CACHE_POOL: {
855      AddCachePoolOp addOp = (AddCachePoolOp) op;
856      fsNamesys.getCacheManager().addCachePool(addOp.info);
857      if (toAddRetryCache) {
858        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
859      }
860      break;
861    }
862    case OP_MODIFY_CACHE_POOL: {
863      ModifyCachePoolOp modifyOp = (ModifyCachePoolOp) op;
864      fsNamesys.getCacheManager().modifyCachePool(modifyOp.info);
865      if (toAddRetryCache) {
866        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
867      }
868      break;
869    }
870    case OP_REMOVE_CACHE_POOL: {
871      RemoveCachePoolOp removeOp = (RemoveCachePoolOp) op;
872      fsNamesys.getCacheManager().removeCachePool(removeOp.poolName);
873      if (toAddRetryCache) {
874        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
875      }
876      break;
877    }
878    case OP_SET_ACL: {
879      SetAclOp setAclOp = (SetAclOp) op;
880      FSDirAclOp.unprotectedSetAcl(fsDir, setAclOp.src, setAclOp.aclEntries,
881          true);
882      break;
883    }
884    case OP_SET_XATTR: {
885      SetXAttrOp setXAttrOp = (SetXAttrOp) op;
886      FSDirXAttrOp.unprotectedSetXAttrs(fsDir, setXAttrOp.src,
887                                        setXAttrOp.xAttrs,
888                                        EnumSet.of(XAttrSetFlag.CREATE,
889                                                   XAttrSetFlag.REPLACE));
890      if (toAddRetryCache) {
891        fsNamesys.addCacheEntry(setXAttrOp.rpcClientId, setXAttrOp.rpcCallId);
892      }
893      break;
894    }
895    case OP_REMOVE_XATTR: {
896      RemoveXAttrOp removeXAttrOp = (RemoveXAttrOp) op;
897      FSDirXAttrOp.unprotectedRemoveXAttrs(fsDir, removeXAttrOp.src,
898                                           removeXAttrOp.xAttrs);
899      if (toAddRetryCache) {
900        fsNamesys.addCacheEntry(removeXAttrOp.rpcClientId,
901            removeXAttrOp.rpcCallId);
902      }
903      break;
904    }
905    case OP_TRUNCATE: {
906      TruncateOp truncateOp = (TruncateOp) op;
907      fsDir.unprotectedTruncate(truncateOp.src, truncateOp.clientName,
908          truncateOp.clientMachine, truncateOp.newLength, truncateOp.timestamp,
909          truncateOp.truncateBlock);
910      break;
911    }
912    case OP_SET_STORAGE_POLICY: {
913      SetStoragePolicyOp setStoragePolicyOp = (SetStoragePolicyOp) op;
914      final String path = renameReservedPathsOnUpgrade(setStoragePolicyOp.path,
915          logVersion);
916      final INodesInPath iip = fsDir.getINodesInPath4Write(path);
917      FSDirAttrOp.unprotectedSetStoragePolicy(
918          fsDir, fsNamesys.getBlockManager(), iip,
919          setStoragePolicyOp.policyId);
920      break;
921    }
922    default:
923      throw new IOException("Invalid operation read " + op.opCode);
924    }
925    return inodeId;
926  }
927  
928  private static String formatEditLogReplayError(EditLogInputStream in,
929      long recentOpcodeOffsets[], long txid) {
930    StringBuilder sb = new StringBuilder();
931    sb.append("Error replaying edit log at offset " + in.getPosition());
932    sb.append(".  Expected transaction ID was ").append(txid);
933    if (recentOpcodeOffsets[0] != -1) {
934      Arrays.sort(recentOpcodeOffsets);
935      sb.append("\nRecent opcode offsets:");
936      for (long offset : recentOpcodeOffsets) {
937        if (offset != -1) {
938          sb.append(' ').append(offset);
939        }
940      }
941    }
942    return sb.toString();
943  }
944
945  /**
946   * Add a new block into the given INodeFile
947   */
948  private void addNewBlock(FSDirectory fsDir, AddBlockOp op, INodeFile file)
949      throws IOException {
950    BlockInfoContiguous[] oldBlocks = file.getBlocks();
951    Block pBlock = op.getPenultimateBlock();
952    Block newBlock= op.getLastBlock();
953    
954    if (pBlock != null) { // the penultimate block is not null
955      Preconditions.checkState(oldBlocks != null && oldBlocks.length > 0);
956      // compare pBlock with the last block of oldBlocks
957      Block oldLastBlock = oldBlocks[oldBlocks.length - 1];
958      if (oldLastBlock.getBlockId() != pBlock.getBlockId()
959          || oldLastBlock.getGenerationStamp() != pBlock.getGenerationStamp()) {
960        throw new IOException(
961            "Mismatched block IDs or generation stamps for the old last block of file "
962                + op.getPath() + ", the old last block is " + oldLastBlock
963                + ", and the block read from editlog is " + pBlock);
964      }
965      
966      oldLastBlock.setNumBytes(pBlock.getNumBytes());
967      if (oldLastBlock instanceof BlockInfoContiguousUnderConstruction) {
968        fsNamesys.getBlockManager().forceCompleteBlock(file,
969            (BlockInfoContiguousUnderConstruction) oldLastBlock);
970        fsNamesys.getBlockManager().processQueuedMessagesForBlock(pBlock);
971      }
972    } else { // the penultimate block is null
973      Preconditions.checkState(oldBlocks == null || oldBlocks.length == 0);
974    }
975    // add the new block
976    BlockInfoContiguous newBI = new BlockInfoContiguousUnderConstruction(
977          newBlock, file.getBlockReplication());
978    fsNamesys.getBlockManager().addBlockCollection(newBI, file);
979    file.addBlock(newBI);
980    fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
981  }
982  
983  /**
984   * Update in-memory data structures with new block information.
985   * @throws IOException
986   */
987  private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op,
988      INodesInPath iip, INodeFile file) throws IOException {
989    // Update its block list
990    BlockInfoContiguous[] oldBlocks = file.getBlocks();
991    Block[] newBlocks = op.getBlocks();
992    String path = op.getPath();
993    
994    // Are we only updating the last block's gen stamp.
995    boolean isGenStampUpdate = oldBlocks.length == newBlocks.length;
996    
997    // First, update blocks in common
998    for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) {
999      BlockInfoContiguous oldBlock = oldBlocks[i];
1000      Block newBlock = newBlocks[i];
1001      
1002      boolean isLastBlock = i == newBlocks.length - 1;
1003      if (oldBlock.getBlockId() != newBlock.getBlockId() ||
1004          (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 
1005              !(isGenStampUpdate && isLastBlock))) {
1006        throw new IOException("Mismatched block IDs or generation stamps, " +
1007            "attempting to replace block " + oldBlock + " with " + newBlock +
1008            " as block # " + i + "/" + newBlocks.length + " of " +
1009            path);
1010      }
1011      
1012      oldBlock.setNumBytes(newBlock.getNumBytes());
1013      boolean changeMade =
1014        oldBlock.getGenerationStamp() != newBlock.getGenerationStamp();
1015      oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
1016      
1017      if (oldBlock instanceof BlockInfoContiguousUnderConstruction &&
1018          (!isLastBlock || op.shouldCompleteLastBlock())) {
1019        changeMade = true;
1020        fsNamesys.getBlockManager().forceCompleteBlock(file,
1021            (BlockInfoContiguousUnderConstruction) oldBlock);
1022      }
1023      if (changeMade) {
1024        // The state or gen-stamp of the block has changed. So, we may be
1025        // able to process some messages from datanodes that we previously
1026        // were unable to process.
1027        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
1028      }
1029    }
1030    
1031    if (newBlocks.length < oldBlocks.length) {
1032      // We're removing a block from the file, e.g. abandonBlock(...)
1033      if (!file.isUnderConstruction()) {
1034        throw new IOException("Trying to remove a block from file " +
1035            path + " which is not under construction.");
1036      }
1037      if (newBlocks.length != oldBlocks.length - 1) {
1038        throw new IOException("Trying to remove more than one block from file "
1039            + path);
1040      }
1041      Block oldBlock = oldBlocks[oldBlocks.length - 1];
1042      boolean removed = fsDir.unprotectedRemoveBlock(path, iip, file, oldBlock);
1043      if (!removed && !(op instanceof UpdateBlocksOp)) {
1044        throw new IOException("Trying to delete non-existant block " + oldBlock);
1045      }
1046    } else if (newBlocks.length > oldBlocks.length) {
1047      // We're adding blocks
1048      for (int i = oldBlocks.length; i < newBlocks.length; i++) {
1049        Block newBlock = newBlocks[i];
1050        BlockInfoContiguous newBI;
1051        if (!op.shouldCompleteLastBlock()) {
1052          // TODO: shouldn't this only be true for the last block?
1053          // what about an old-version fsync() where fsync isn't called
1054          // until several blocks in?
1055          newBI = new BlockInfoContiguousUnderConstruction(
1056              newBlock, file.getBlockReplication());
1057        } else {
1058          // OP_CLOSE should add finalized blocks. This code path
1059          // is only executed when loading edits written by prior
1060          // versions of Hadoop. Current versions always log
1061          // OP_ADD operations as each block is allocated.
1062          newBI = new BlockInfoContiguous(newBlock, file.getBlockReplication());
1063        }
1064        fsNamesys.getBlockManager().addBlockCollection(newBI, file);
1065        file.addBlock(newBI);
1066        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
1067      }
1068    }
1069  }
1070
1071  private static void dumpOpCounts(
1072      EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) {
1073    StringBuilder sb = new StringBuilder();
1074    sb.append("Summary of operations loaded from edit log:\n  ");
1075    Joiner.on("\n  ").withKeyValueSeparator("=").appendTo(sb, opCounts);
1076    FSImage.LOG.debug(sb.toString());
1077  }
1078
1079  private void incrOpCount(FSEditLogOpCodes opCode,
1080      EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts, Step step,
1081      Counter counter) {
1082    Holder<Integer> holder = opCounts.get(opCode);
1083    if (holder == null) {
1084      holder = new Holder<Integer>(1);
1085      opCounts.put(opCode, holder);
1086    } else {
1087      holder.held++;
1088    }
1089    counter.increment();
1090  }
1091
1092  /**
1093   * Throw appropriate exception during upgrade from 203, when editlog loading
1094   * could fail due to opcode conflicts.
1095   */
1096  private void check203UpgradeFailure(int logVersion, Throwable e)
1097      throws IOException {
1098    // 0.20.203 version version has conflicting opcodes with the later releases.
1099    // The editlog must be emptied by restarting the namenode, before proceeding
1100    // with the upgrade.
1101    if (Storage.is203LayoutVersion(logVersion)
1102        && logVersion != HdfsConstants.NAMENODE_LAYOUT_VERSION) {
1103      String msg = "During upgrade failed to load the editlog version "
1104          + logVersion + " from release 0.20.203. Please go back to the old "
1105          + " release and restart the namenode. This empties the editlog "
1106          + " and saves the namespace. Resume the upgrade after this step.";
1107      throw new IOException(msg, e);
1108    }
1109  }
1110  
1111  /**
1112   * Find the last valid transaction ID in the stream.
1113   * If there are invalid or corrupt transactions in the middle of the stream,
1114   * validateEditLog will skip over them.
1115   * This reads through the stream but does not close it.
1116   */
1117  static EditLogValidation validateEditLog(EditLogInputStream in) {
1118    long lastPos = 0;
1119    long lastTxId = HdfsConstants.INVALID_TXID;
1120    long numValid = 0;
1121    FSEditLogOp op = null;
1122    while (true) {
1123      lastPos = in.getPosition();
1124      try {
1125        if ((op = in.readOp()) == null) {
1126          break;
1127        }
1128      } catch (Throwable t) {
1129        FSImage.LOG.warn("Caught exception after reading " + numValid +
1130            " ops from " + in + " while determining its valid length." +
1131            "Position was " + lastPos, t);
1132        in.resync();
1133        FSImage.LOG.warn("After resync, position is " + in.getPosition());
1134        continue;
1135      }
1136      if (lastTxId == HdfsConstants.INVALID_TXID
1137          || op.getTransactionId() > lastTxId) {
1138        lastTxId = op.getTransactionId();
1139      }
1140      numValid++;
1141    }
1142    return new EditLogValidation(lastPos, lastTxId, false);
1143  }
1144
1145  static EditLogValidation scanEditLog(EditLogInputStream in) {
1146    long lastPos = 0;
1147    long lastTxId = HdfsConstants.INVALID_TXID;
1148    long numValid = 0;
1149    FSEditLogOp op = null;
1150    while (true) {
1151      lastPos = in.getPosition();
1152      try {
1153        if ((op = in.readOp()) == null) { // TODO
1154          break;
1155        }
1156      } catch (Throwable t) {
1157        FSImage.LOG.warn("Caught exception after reading " + numValid +
1158            " ops from " + in + " while determining its valid length." +
1159            "Position was " + lastPos, t);
1160        in.resync();
1161        FSImage.LOG.warn("After resync, position is " + in.getPosition());
1162        continue;
1163      }
1164      if (lastTxId == HdfsConstants.INVALID_TXID
1165          || op.getTransactionId() > lastTxId) {
1166        lastTxId = op.getTransactionId();
1167      }
1168      numValid++;
1169    }
1170    return new EditLogValidation(lastPos, lastTxId, false);
1171  }
1172
1173  static class EditLogValidation {
1174    private final long validLength;
1175    private final long endTxId;
1176    private final boolean hasCorruptHeader;
1177
1178    EditLogValidation(long validLength, long endTxId,
1179        boolean hasCorruptHeader) {
1180      this.validLength = validLength;
1181      this.endTxId = endTxId;
1182      this.hasCorruptHeader = hasCorruptHeader;
1183    }
1184
1185    long getValidLength() { return validLength; }
1186
1187    long getEndTxId() { return endTxId; }
1188
1189    boolean hasCorruptHeader() { return hasCorruptHeader; }
1190  }
1191
1192  /**
1193   * Stream wrapper that keeps track of the current stream position.
1194   * 
1195   * This stream also allows us to set a limit on how many bytes we can read
1196   * without getting an exception.
1197   */
1198  public static class PositionTrackingInputStream extends FilterInputStream
1199      implements StreamLimiter {
1200    private long curPos = 0;
1201    private long markPos = -1;
1202    private long limitPos = Long.MAX_VALUE;
1203
1204    public PositionTrackingInputStream(InputStream is) {
1205      super(is);
1206    }
1207
1208    private void checkLimit(long amt) throws IOException {
1209      long extra = (curPos + amt) - limitPos;
1210      if (extra > 0) {
1211        throw new IOException("Tried to read " + amt + " byte(s) past " +
1212            "the limit at offset " + limitPos);
1213      }
1214    }
1215    
1216    @Override
1217    public int read() throws IOException {
1218      checkLimit(1);
1219      int ret = super.read();
1220      if (ret != -1) curPos++;
1221      return ret;
1222    }
1223
1224    @Override
1225    public int read(byte[] data) throws IOException {
1226      checkLimit(data.length);
1227      int ret = super.read(data);
1228      if (ret > 0) curPos += ret;
1229      return ret;
1230    }
1231
1232    @Override
1233    public int read(byte[] data, int offset, int length) throws IOException {
1234      checkLimit(length);
1235      int ret = super.read(data, offset, length);
1236      if (ret > 0) curPos += ret;
1237      return ret;
1238    }
1239
1240    @Override
1241    public void setLimit(long limit) {
1242      limitPos = curPos + limit;
1243    }
1244
1245    @Override
1246    public void clearLimit() {
1247      limitPos = Long.MAX_VALUE;
1248    }
1249
1250    @Override
1251    public void mark(int limit) {
1252      super.mark(limit);
1253      markPos = curPos;
1254    }
1255
1256    @Override
1257    public void reset() throws IOException {
1258      if (markPos == -1) {
1259        throw new IOException("Not marked!");
1260      }
1261      super.reset();
1262      curPos = markPos;
1263      markPos = -1;
1264    }
1265
1266    public long getPos() {
1267      return curPos;
1268    }
1269    
1270    @Override
1271    public long skip(long amt) throws IOException {
1272      long extra = (curPos + amt) - limitPos;
1273      if (extra > 0) {
1274        throw new IOException("Tried to skip " + extra + " bytes past " +
1275            "the limit at offset " + limitPos);
1276      }
1277      long ret = super.skip(amt);
1278      curPos += ret;
1279      return ret;
1280    }
1281  }
1282
1283  public long getLastAppliedTxId() {
1284    return lastAppliedTxId;
1285  }
1286
1287  /**
1288   * Creates a Step used for updating startup progress, populated with
1289   * information from the given edits.  The step always includes the log's name.
1290   * If the log has a known length, then the length is included in the step too.
1291   * 
1292   * @param edits EditLogInputStream to use for populating step
1293   * @return Step populated with information from edits
1294   * @throws IOException thrown if there is an I/O error
1295   */
1296  private static Step createStartupProgressStep(EditLogInputStream edits)
1297      throws IOException {
1298    long length = edits.length();
1299    String name = edits.getCurrentStreamName();
1300    return length != -1 ? new Step(name, length) : new Step(name);
1301  }
1302}