001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import static org.apache.hadoop.hdfs.server.namenode.FSImageFormat.renameReservedPathsOnUpgrade;
021import static org.apache.hadoop.util.Time.now;
022
023import java.io.FilterInputStream;
024import java.io.IOException;
025import java.io.InputStream;
026import java.util.Arrays;
027import java.util.EnumMap;
028import java.util.EnumSet;
029import java.util.List;
030
031import org.apache.commons.logging.Log;
032import org.apache.commons.logging.LogFactory;
033import org.apache.hadoop.classification.InterfaceAudience;
034import org.apache.hadoop.classification.InterfaceStability;
035import org.apache.hadoop.fs.FileSystem;
036import org.apache.hadoop.fs.XAttrSetFlag;
037import org.apache.hadoop.hdfs.protocol.Block;
038import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
039import org.apache.hadoop.hdfs.protocol.HdfsConstants;
040import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
041import org.apache.hadoop.hdfs.protocol.LayoutVersion;
042import org.apache.hadoop.hdfs.protocol.LocatedBlock;
043import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
044import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
045import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
046import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
047import org.apache.hadoop.hdfs.server.common.Storage;
048import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp;
049import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp;
050import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCachePoolOp;
051import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp;
052import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp;
053import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp;
054import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp;
055import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp;
056import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp;
057import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp;
058import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CreateSnapshotOp;
059import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
060import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteSnapshotOp;
061import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp;
062import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp;
063import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
064import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCacheDirectiveInfoOp;
065import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCachePoolOp;
066import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp;
067import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCacheDirectiveInfoOp;
068import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCachePoolOp;
069import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp;
070import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp;
071import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp;
072import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp;
073import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetAclOp;
074import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp;
075import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op;
076import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op;
077import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp;
078import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp;
079import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp;
080import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp;
081import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp;
082import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetXAttrOp;
083import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveXAttrOp;
084import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp;
085import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
086import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
087import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
088import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
089import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
090import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
091import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
092import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
093import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
094import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
095import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
096import org.apache.hadoop.hdfs.util.ChunkedArrayList;
097import org.apache.hadoop.hdfs.util.Holder;
098
099import com.google.common.base.Joiner;
100import com.google.common.base.Preconditions;
101
102@InterfaceAudience.Private
103@InterfaceStability.Evolving
104public class FSEditLogLoader {
105  static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName());
106  static final long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec
107
108  private final FSNamesystem fsNamesys;
109  private long lastAppliedTxId;
110  /** Total number of end transactions loaded. */
111  private int totalEdits = 0;
112  
113  public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) {
114    this.fsNamesys = fsNamesys;
115    this.lastAppliedTxId = lastAppliedTxId;
116  }
117  
118  long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId)
119      throws IOException {
120    return loadFSEdits(edits, expectedStartingTxId, null, null);
121  }
122
123  /**
124   * Load an edit log, and apply the changes to the in-memory structure
125   * This is where we apply edits that we've been writing to disk all
126   * along.
127   */
128  long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId,
129      StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
130    StartupProgress prog = NameNode.getStartupProgress();
131    Step step = createStartupProgressStep(edits);
132    prog.beginStep(Phase.LOADING_EDITS, step);
133    fsNamesys.writeLock();
134    try {
135      long startTime = now();
136      FSImage.LOG.info("Start loading edits file " + edits.getName());
137      long numEdits = loadEditRecords(edits, false, expectedStartingTxId,
138          startOpt, recovery);
139      FSImage.LOG.info("Edits file " + edits.getName() 
140          + " of size " + edits.length() + " edits # " + numEdits 
141          + " loaded in " + (now()-startTime)/1000 + " seconds");
142      return numEdits;
143    } finally {
144      edits.close();
145      fsNamesys.writeUnlock();
146      prog.endStep(Phase.LOADING_EDITS, step);
147    }
148  }
149
150  long loadEditRecords(EditLogInputStream in, boolean closeOnExit,
151      long expectedStartingTxId, StartupOption startOpt,
152      MetaRecoveryContext recovery) throws IOException {
153    FSDirectory fsDir = fsNamesys.dir;
154
155    EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
156      new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
157
158    if (LOG.isTraceEnabled()) {
159      LOG.trace("Acquiring write lock to replay edit log");
160    }
161
162    fsNamesys.writeLock();
163    fsDir.writeLock();
164
165    long recentOpcodeOffsets[] = new long[4];
166    Arrays.fill(recentOpcodeOffsets, -1);
167    
168    long expectedTxId = expectedStartingTxId;
169    long numEdits = 0;
170    long lastTxId = in.getLastTxId();
171    long numTxns = (lastTxId - expectedStartingTxId) + 1;
172    StartupProgress prog = NameNode.getStartupProgress();
173    Step step = createStartupProgressStep(in);
174    prog.setTotal(Phase.LOADING_EDITS, step, numTxns);
175    Counter counter = prog.getCounter(Phase.LOADING_EDITS, step);
176    long lastLogTime = now();
177    long lastInodeId = fsNamesys.getLastInodeId();
178    
179    try {
180      while (true) {
181        try {
182          FSEditLogOp op;
183          try {
184            op = in.readOp();
185            if (op == null) {
186              break;
187            }
188          } catch (Throwable e) {
189            // Handle a problem with our input
190            check203UpgradeFailure(in.getVersion(true), e);
191            String errorMessage =
192              formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
193            FSImage.LOG.error(errorMessage, e);
194            if (recovery == null) {
195               // We will only try to skip over problematic opcodes when in
196               // recovery mode.
197              throw new EditLogInputException(errorMessage, e, numEdits);
198            }
199            MetaRecoveryContext.editLogLoaderPrompt(
200                "We failed to read txId " + expectedTxId,
201                recovery, "skipping the bad section in the log");
202            in.resync();
203            continue;
204          }
205          recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] =
206            in.getPosition();
207          if (op.hasTransactionId()) {
208            if (op.getTransactionId() > expectedTxId) { 
209              MetaRecoveryContext.editLogLoaderPrompt("There appears " +
210                  "to be a gap in the edit log.  We expected txid " +
211                  expectedTxId + ", but got txid " +
212                  op.getTransactionId() + ".", recovery, "ignoring missing " +
213                  " transaction IDs");
214            } else if (op.getTransactionId() < expectedTxId) { 
215              MetaRecoveryContext.editLogLoaderPrompt("There appears " +
216                  "to be an out-of-order edit in the edit log.  We " +
217                  "expected txid " + expectedTxId + ", but got txid " +
218                  op.getTransactionId() + ".", recovery,
219                  "skipping the out-of-order edit");
220              continue;
221            }
222          }
223          try {
224            if (LOG.isTraceEnabled()) {
225              LOG.trace("op=" + op + ", startOpt=" + startOpt
226                  + ", numEdits=" + numEdits + ", totalEdits=" + totalEdits);
227            }
228            long inodeId = applyEditLogOp(op, fsDir, startOpt,
229                in.getVersion(true), lastInodeId);
230            if (lastInodeId < inodeId) {
231              lastInodeId = inodeId;
232            }
233          } catch (RollingUpgradeOp.RollbackException e) {
234            throw e;
235          } catch (Throwable e) {
236            LOG.error("Encountered exception on operation " + op, e);
237            if (recovery == null) {
238              throw e instanceof IOException? (IOException)e: new IOException(e);
239            }
240
241            MetaRecoveryContext.editLogLoaderPrompt("Failed to " +
242             "apply edit log operation " + op + ": error " +
243             e.getMessage(), recovery, "applying edits");
244          }
245          // Now that the operation has been successfully decoded and
246          // applied, update our bookkeeping.
247          incrOpCount(op.opCode, opCounts, step, counter);
248          if (op.hasTransactionId()) {
249            lastAppliedTxId = op.getTransactionId();
250            expectedTxId = lastAppliedTxId + 1;
251          } else {
252            expectedTxId = lastAppliedTxId = expectedStartingTxId;
253          }
254          // log progress
255          if (op.hasTransactionId()) {
256            long now = now();
257            if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
258              long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
259              int percent = Math.round((float) deltaTxId / numTxns * 100);
260              LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns
261                  + " transactions completed. (" + percent + "%)");
262              lastLogTime = now;
263            }
264          }
265          numEdits++;
266          totalEdits++;
267        } catch (RollingUpgradeOp.RollbackException e) {
268          LOG.info("Stopped at OP_START_ROLLING_UPGRADE for rollback.");
269          break;
270        } catch (MetaRecoveryContext.RequestStopException e) {
271          MetaRecoveryContext.LOG.warn("Stopped reading edit log at " +
272              in.getPosition() + "/"  + in.length());
273          break;
274        }
275      }
276    } finally {
277      fsNamesys.resetLastInodeId(lastInodeId);
278      if(closeOnExit) {
279        in.close();
280      }
281      fsDir.writeUnlock();
282      fsNamesys.writeUnlock();
283
284      if (LOG.isTraceEnabled()) {
285        LOG.trace("replaying edit log finished");
286      }
287
288      if (FSImage.LOG.isDebugEnabled()) {
289        dumpOpCounts(opCounts);
290      }
291    }
292    return numEdits;
293  }
294  
295  // allocate and update last allocated inode id
296  private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion,
297      long lastInodeId) throws IOException {
298    long inodeId = inodeIdFromOp;
299
300    if (inodeId == INodeId.GRANDFATHER_INODE_ID) {
301      if (NameNodeLayoutVersion.supports(
302          LayoutVersion.Feature.ADD_INODE_ID, logVersion)) {
303        throw new IOException("The layout version " + logVersion
304            + " supports inodeId but gave bogus inodeId");
305      }
306      inodeId = fsNamesys.allocateNewInodeId();
307    } else {
308      // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from
309      // fsimage but editlog captures more recent inodeId allocations
310      if (inodeId > lastInodeId) {
311        fsNamesys.resetLastInodeId(inodeId);
312      }
313    }
314    return inodeId;
315  }
316
317  @SuppressWarnings("deprecation")
318  private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
319      StartupOption startOpt, int logVersion, long lastInodeId) throws IOException {
320    long inodeId = INodeId.GRANDFATHER_INODE_ID;
321    if (LOG.isTraceEnabled()) {
322      LOG.trace("replaying edit log: " + op);
323    }
324    final boolean toAddRetryCache = fsNamesys.hasRetryCache() && op.hasRpcIds();
325    
326    switch (op.opCode) {
327    case OP_ADD: {
328      AddCloseOp addCloseOp = (AddCloseOp)op;
329      final String path =
330          renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
331      if (FSNamesystem.LOG.isDebugEnabled()) {
332        FSNamesystem.LOG.debug(op.opCode + ": " + path +
333            " numblocks : " + addCloseOp.blocks.length +
334            " clientHolder " + addCloseOp.clientName +
335            " clientMachine " + addCloseOp.clientMachine);
336      }
337      // There three cases here:
338      // 1. OP_ADD to create a new file
339      // 2. OP_ADD to update file blocks
340      // 3. OP_ADD to open file for append
341
342      // See if the file already exists (persistBlocks call)
343      final INodesInPath iip = fsDir.getLastINodeInPath(path);
344      final INodeFile oldFile = INodeFile.valueOf(
345          iip.getINode(0), path, true);
346      INodeFile newFile = oldFile;
347      if (oldFile == null) { // this is OP_ADD on a new file (case 1)
348        // versions > 0 support per file replication
349        // get name and replication
350        final short replication = fsNamesys.getBlockManager()
351            .adjustReplication(addCloseOp.replication);
352        assert addCloseOp.blocks.length == 0;
353
354        // add to the file tree
355        inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion,
356            lastInodeId);
357        newFile = fsDir.unprotectedAddFile(inodeId,
358            path, addCloseOp.permissions, addCloseOp.aclEntries,
359            addCloseOp.xAttrs,
360            replication, addCloseOp.mtime, addCloseOp.atime,
361            addCloseOp.blockSize, true, addCloseOp.clientName,
362            addCloseOp.clientMachine);
363        fsNamesys.leaseManager.addLease(addCloseOp.clientName, path);
364
365        // add the op into retry cache if necessary
366        if (toAddRetryCache) {
367          HdfsFileStatus stat = fsNamesys.dir.createFileStatus(
368              HdfsFileStatus.EMPTY_NAME, newFile, Snapshot.CURRENT_STATE_ID);
369          fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
370              addCloseOp.rpcCallId, stat);
371        }
372      } else { // This is OP_ADD on an existing file
373        if (!oldFile.isUnderConstruction()) {
374          // This is case 3: a call to append() on an already-closed file.
375          if (FSNamesystem.LOG.isDebugEnabled()) {
376            FSNamesystem.LOG.debug("Reopening an already-closed file " +
377                "for append");
378          }
379          LocatedBlock lb = fsNamesys.prepareFileForWrite(path,
380              oldFile, addCloseOp.clientName, addCloseOp.clientMachine, false, iip.getLatestSnapshotId(), false);
381          newFile = INodeFile.valueOf(fsDir.getINode(path),
382              path, true);
383          
384          // add the op into retry cache is necessary
385          if (toAddRetryCache) {
386            fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
387                addCloseOp.rpcCallId, lb);
388          }
389        }
390      }
391      // Fall-through for case 2.
392      // Regardless of whether it's a new file or an updated file,
393      // update the block list.
394      
395      // Update the salient file attributes.
396      newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
397      newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
398      updateBlocks(fsDir, addCloseOp, newFile);
399      break;
400    }
401    case OP_CLOSE: {
402      AddCloseOp addCloseOp = (AddCloseOp)op;
403      final String path =
404          renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
405      if (FSNamesystem.LOG.isDebugEnabled()) {
406        FSNamesystem.LOG.debug(op.opCode + ": " + path +
407            " numblocks : " + addCloseOp.blocks.length +
408            " clientHolder " + addCloseOp.clientName +
409            " clientMachine " + addCloseOp.clientMachine);
410      }
411
412      final INodesInPath iip = fsDir.getLastINodeInPath(path);
413      final INodeFile file = INodeFile.valueOf(iip.getINode(0), path);
414
415      // Update the salient file attributes.
416      file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
417      file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
418      updateBlocks(fsDir, addCloseOp, file);
419
420      // Now close the file
421      if (!file.isUnderConstruction() &&
422          logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
423        // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
424        // could show up twice in a row. But after that version, this
425        // should be fixed, so we should treat it as an error.
426        throw new IOException(
427            "File is not under construction: " + path);
428      }
429      // One might expect that you could use removeLease(holder, path) here,
430      // but OP_CLOSE doesn't serialize the holder. So, remove by path.
431      if (file.isUnderConstruction()) {
432        fsNamesys.leaseManager.removeLeaseWithPrefixPath(path);
433        file.toCompleteFile(file.getModificationTime());
434      }
435      break;
436    }
437    case OP_UPDATE_BLOCKS: {
438      UpdateBlocksOp updateOp = (UpdateBlocksOp)op;
439      final String path =
440          renameReservedPathsOnUpgrade(updateOp.path, logVersion);
441      if (FSNamesystem.LOG.isDebugEnabled()) {
442        FSNamesystem.LOG.debug(op.opCode + ": " + path +
443            " numblocks : " + updateOp.blocks.length);
444      }
445      INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path),
446          path);
447      // Update in-memory data structures
448      updateBlocks(fsDir, updateOp, oldFile);
449      
450      if (toAddRetryCache) {
451        fsNamesys.addCacheEntry(updateOp.rpcClientId, updateOp.rpcCallId);
452      }
453      break;
454    }
455    case OP_ADD_BLOCK: {
456      AddBlockOp addBlockOp = (AddBlockOp) op;
457      String path = renameReservedPathsOnUpgrade(addBlockOp.getPath(), logVersion);
458      if (FSNamesystem.LOG.isDebugEnabled()) {
459        FSNamesystem.LOG.debug(op.opCode + ": " + path +
460            " new block id : " + addBlockOp.getLastBlock().getBlockId());
461      }
462      INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path), path);
463      // add the new block to the INodeFile
464      addNewBlock(fsDir, addBlockOp, oldFile);
465      break;
466    }
467    case OP_SET_REPLICATION: {
468      SetReplicationOp setReplicationOp = (SetReplicationOp)op;
469      short replication = fsNamesys.getBlockManager().adjustReplication(
470          setReplicationOp.replication);
471      fsDir.unprotectedSetReplication(
472          renameReservedPathsOnUpgrade(setReplicationOp.path, logVersion),
473                                      replication, null);
474      break;
475    }
476    case OP_CONCAT_DELETE: {
477      ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
478      String trg = renameReservedPathsOnUpgrade(concatDeleteOp.trg, logVersion);
479      String[] srcs = new String[concatDeleteOp.srcs.length];
480      for (int i=0; i<srcs.length; i++) {
481        srcs[i] =
482            renameReservedPathsOnUpgrade(concatDeleteOp.srcs[i], logVersion);
483      }
484      fsDir.unprotectedConcat(trg, srcs, concatDeleteOp.timestamp);
485      
486      if (toAddRetryCache) {
487        fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId,
488            concatDeleteOp.rpcCallId);
489      }
490      break;
491    }
492    case OP_RENAME_OLD: {
493      RenameOldOp renameOp = (RenameOldOp)op;
494      final String src = renameReservedPathsOnUpgrade(renameOp.src, logVersion);
495      final String dst = renameReservedPathsOnUpgrade(renameOp.dst, logVersion);
496      fsDir.unprotectedRenameTo(src, dst,
497                                renameOp.timestamp);
498      
499      if (toAddRetryCache) {
500        fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
501      }
502      break;
503    }
504    case OP_DELETE: {
505      DeleteOp deleteOp = (DeleteOp)op;
506      fsDir.unprotectedDelete(
507          renameReservedPathsOnUpgrade(deleteOp.path, logVersion),
508          deleteOp.timestamp);
509      
510      if (toAddRetryCache) {
511        fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId);
512      }
513      break;
514    }
515    case OP_MKDIR: {
516      MkdirOp mkdirOp = (MkdirOp)op;
517      inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion,
518          lastInodeId);
519      fsDir.unprotectedMkdir(inodeId,
520          renameReservedPathsOnUpgrade(mkdirOp.path, logVersion),
521          mkdirOp.permissions, mkdirOp.aclEntries, mkdirOp.timestamp);
522      break;
523    }
524    case OP_SET_GENSTAMP_V1: {
525      SetGenstampV1Op setGenstampV1Op = (SetGenstampV1Op)op;
526      fsNamesys.setGenerationStampV1(setGenstampV1Op.genStampV1);
527      break;
528    }
529    case OP_SET_PERMISSIONS: {
530      SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
531      fsDir.unprotectedSetPermission(
532          renameReservedPathsOnUpgrade(setPermissionsOp.src, logVersion),
533          setPermissionsOp.permissions);
534      break;
535    }
536    case OP_SET_OWNER: {
537      SetOwnerOp setOwnerOp = (SetOwnerOp)op;
538      fsDir.unprotectedSetOwner(
539          renameReservedPathsOnUpgrade(setOwnerOp.src, logVersion),
540          setOwnerOp.username, setOwnerOp.groupname);
541      break;
542    }
543    case OP_SET_NS_QUOTA: {
544      SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
545      fsDir.unprotectedSetQuota(
546          renameReservedPathsOnUpgrade(setNSQuotaOp.src, logVersion),
547          setNSQuotaOp.nsQuota, HdfsConstants.QUOTA_DONT_SET);
548      break;
549    }
550    case OP_CLEAR_NS_QUOTA: {
551      ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
552      fsDir.unprotectedSetQuota(
553          renameReservedPathsOnUpgrade(clearNSQuotaOp.src, logVersion),
554          HdfsConstants.QUOTA_RESET, HdfsConstants.QUOTA_DONT_SET);
555      break;
556    }
557
558    case OP_SET_QUOTA:
559      SetQuotaOp setQuotaOp = (SetQuotaOp)op;
560      fsDir.unprotectedSetQuota(
561          renameReservedPathsOnUpgrade(setQuotaOp.src, logVersion),
562          setQuotaOp.nsQuota, setQuotaOp.dsQuota);
563      break;
564
565    case OP_TIMES: {
566      TimesOp timesOp = (TimesOp)op;
567
568      fsDir.unprotectedSetTimes(
569          renameReservedPathsOnUpgrade(timesOp.path, logVersion),
570          timesOp.mtime, timesOp.atime, true);
571      break;
572    }
573    case OP_SYMLINK: {
574      if (!FileSystem.areSymlinksEnabled()) {
575        throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS");
576      }
577      SymlinkOp symlinkOp = (SymlinkOp)op;
578      inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion,
579          lastInodeId);
580      fsDir.unprotectedAddSymlink(inodeId,
581          renameReservedPathsOnUpgrade(symlinkOp.path, logVersion),
582          symlinkOp.value, symlinkOp.mtime, symlinkOp.atime,
583          symlinkOp.permissionStatus);
584      
585      if (toAddRetryCache) {
586        fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId);
587      }
588      break;
589    }
590    case OP_RENAME: {
591      RenameOp renameOp = (RenameOp)op;
592      fsDir.unprotectedRenameTo(
593          renameReservedPathsOnUpgrade(renameOp.src, logVersion),
594          renameReservedPathsOnUpgrade(renameOp.dst, logVersion),
595          renameOp.timestamp, renameOp.options);
596      
597      if (toAddRetryCache) {
598        fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
599      }
600      break;
601    }
602    case OP_GET_DELEGATION_TOKEN: {
603      GetDelegationTokenOp getDelegationTokenOp
604        = (GetDelegationTokenOp)op;
605
606      fsNamesys.getDelegationTokenSecretManager()
607        .addPersistedDelegationToken(getDelegationTokenOp.token,
608                                     getDelegationTokenOp.expiryTime);
609      break;
610    }
611    case OP_RENEW_DELEGATION_TOKEN: {
612      RenewDelegationTokenOp renewDelegationTokenOp
613        = (RenewDelegationTokenOp)op;
614      fsNamesys.getDelegationTokenSecretManager()
615        .updatePersistedTokenRenewal(renewDelegationTokenOp.token,
616                                     renewDelegationTokenOp.expiryTime);
617      break;
618    }
619    case OP_CANCEL_DELEGATION_TOKEN: {
620      CancelDelegationTokenOp cancelDelegationTokenOp
621        = (CancelDelegationTokenOp)op;
622      fsNamesys.getDelegationTokenSecretManager()
623          .updatePersistedTokenCancellation(
624              cancelDelegationTokenOp.token);
625      break;
626    }
627    case OP_UPDATE_MASTER_KEY: {
628      UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op;
629      fsNamesys.getDelegationTokenSecretManager()
630        .updatePersistedMasterKey(updateMasterKeyOp.key);
631      break;
632    }
633    case OP_REASSIGN_LEASE: {
634      ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op;
635
636      Lease lease = fsNamesys.leaseManager.getLease(
637          reassignLeaseOp.leaseHolder);
638      final String path =
639          renameReservedPathsOnUpgrade(reassignLeaseOp.path, logVersion);
640      INodeFile pendingFile = fsDir.getINode(path).asFile();
641      Preconditions.checkState(pendingFile.isUnderConstruction());
642      fsNamesys.reassignLeaseInternal(lease,
643          path, reassignLeaseOp.newHolder, pendingFile);
644      break;
645    }
646    case OP_START_LOG_SEGMENT:
647    case OP_END_LOG_SEGMENT: {
648      // no data in here currently.
649      break;
650    }
651    case OP_CREATE_SNAPSHOT: {
652      CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op;
653      final String snapshotRoot =
654          renameReservedPathsOnUpgrade(createSnapshotOp.snapshotRoot,
655              logVersion);
656      String path = fsNamesys.getSnapshotManager().createSnapshot(
657          snapshotRoot, createSnapshotOp.snapshotName);
658      if (toAddRetryCache) {
659        fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId,
660            createSnapshotOp.rpcCallId, path);
661      }
662      break;
663    }
664    case OP_DELETE_SNAPSHOT: {
665      DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op;
666      BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
667      List<INode> removedINodes = new ChunkedArrayList<INode>();
668      final String snapshotRoot =
669          renameReservedPathsOnUpgrade(deleteSnapshotOp.snapshotRoot,
670              logVersion);
671      fsNamesys.getSnapshotManager().deleteSnapshot(
672          snapshotRoot, deleteSnapshotOp.snapshotName,
673          collectedBlocks, removedINodes);
674      fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
675      collectedBlocks.clear();
676      fsNamesys.dir.removeFromInodeMap(removedINodes);
677      removedINodes.clear();
678      
679      if (toAddRetryCache) {
680        fsNamesys.addCacheEntry(deleteSnapshotOp.rpcClientId,
681            deleteSnapshotOp.rpcCallId);
682      }
683      break;
684    }
685    case OP_RENAME_SNAPSHOT: {
686      RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op;
687      final String snapshotRoot =
688          renameReservedPathsOnUpgrade(renameSnapshotOp.snapshotRoot,
689              logVersion);
690      fsNamesys.getSnapshotManager().renameSnapshot(
691          snapshotRoot, renameSnapshotOp.snapshotOldName,
692          renameSnapshotOp.snapshotNewName);
693      
694      if (toAddRetryCache) {
695        fsNamesys.addCacheEntry(renameSnapshotOp.rpcClientId,
696            renameSnapshotOp.rpcCallId);
697      }
698      break;
699    }
700    case OP_ALLOW_SNAPSHOT: {
701      AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op;
702      final String snapshotRoot =
703          renameReservedPathsOnUpgrade(allowSnapshotOp.snapshotRoot, logVersion);
704      fsNamesys.getSnapshotManager().setSnapshottable(
705          snapshotRoot, false);
706      break;
707    }
708    case OP_DISALLOW_SNAPSHOT: {
709      DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op;
710      final String snapshotRoot =
711          renameReservedPathsOnUpgrade(disallowSnapshotOp.snapshotRoot,
712              logVersion);
713      fsNamesys.getSnapshotManager().resetSnapshottable(
714          snapshotRoot);
715      break;
716    }
717    case OP_SET_GENSTAMP_V2: {
718      SetGenstampV2Op setGenstampV2Op = (SetGenstampV2Op) op;
719      fsNamesys.setGenerationStampV2(setGenstampV2Op.genStampV2);
720      break;
721    }
722    case OP_ALLOCATE_BLOCK_ID: {
723      AllocateBlockIdOp allocateBlockIdOp = (AllocateBlockIdOp) op;
724      fsNamesys.setLastAllocatedBlockId(allocateBlockIdOp.blockId);
725      break;
726    }
727    case OP_ROLLING_UPGRADE_START: {
728      if (startOpt == StartupOption.ROLLINGUPGRADE) {
729        final RollingUpgradeStartupOption rollingUpgradeOpt
730            = startOpt.getRollingUpgradeStartupOption(); 
731        if (rollingUpgradeOpt == RollingUpgradeStartupOption.ROLLBACK) {
732          throw new RollingUpgradeOp.RollbackException();
733        } else if (rollingUpgradeOpt == RollingUpgradeStartupOption.DOWNGRADE) {
734          //ignore upgrade marker
735          break;
736        }
737      }
738      // start rolling upgrade
739      final long startTime = ((RollingUpgradeOp) op).getTime();
740      fsNamesys.startRollingUpgradeInternal(startTime);
741      fsNamesys.triggerRollbackCheckpoint();
742      break;
743    }
744    case OP_ROLLING_UPGRADE_FINALIZE: {
745      final long finalizeTime = ((RollingUpgradeOp) op).getTime();
746      if (fsNamesys.isRollingUpgrade()) {
747        // Only do it when NN is actually doing rolling upgrade.
748        // We can get FINALIZE without corresponding START, if NN is restarted
749        // before this op is consumed and a new checkpoint is created.
750        fsNamesys.finalizeRollingUpgradeInternal(finalizeTime);
751      }
752      fsNamesys.getFSImage().updateStorageVersion();
753      fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
754          NameNodeFile.IMAGE);
755      break;
756    }
757    case OP_ADD_CACHE_DIRECTIVE: {
758      AddCacheDirectiveInfoOp addOp = (AddCacheDirectiveInfoOp) op;
759      CacheDirectiveInfo result = fsNamesys.
760          getCacheManager().addDirectiveFromEditLog(addOp.directive);
761      if (toAddRetryCache) {
762        Long id = result.getId();
763        fsNamesys.addCacheEntryWithPayload(op.rpcClientId, op.rpcCallId, id);
764      }
765      break;
766    }
767    case OP_MODIFY_CACHE_DIRECTIVE: {
768      ModifyCacheDirectiveInfoOp modifyOp =
769          (ModifyCacheDirectiveInfoOp) op;
770      fsNamesys.getCacheManager().modifyDirectiveFromEditLog(
771          modifyOp.directive);
772      if (toAddRetryCache) {
773        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
774      }
775      break;
776    }
777    case OP_REMOVE_CACHE_DIRECTIVE: {
778      RemoveCacheDirectiveInfoOp removeOp =
779          (RemoveCacheDirectiveInfoOp) op;
780      fsNamesys.getCacheManager().removeDirective(removeOp.id, null);
781      if (toAddRetryCache) {
782        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
783      }
784      break;
785    }
786    case OP_ADD_CACHE_POOL: {
787      AddCachePoolOp addOp = (AddCachePoolOp) op;
788      fsNamesys.getCacheManager().addCachePool(addOp.info);
789      if (toAddRetryCache) {
790        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
791      }
792      break;
793    }
794    case OP_MODIFY_CACHE_POOL: {
795      ModifyCachePoolOp modifyOp = (ModifyCachePoolOp) op;
796      fsNamesys.getCacheManager().modifyCachePool(modifyOp.info);
797      if (toAddRetryCache) {
798        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
799      }
800      break;
801    }
802    case OP_REMOVE_CACHE_POOL: {
803      RemoveCachePoolOp removeOp = (RemoveCachePoolOp) op;
804      fsNamesys.getCacheManager().removeCachePool(removeOp.poolName);
805      if (toAddRetryCache) {
806        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
807      }
808      break;
809    }
810    case OP_SET_ACL: {
811      SetAclOp setAclOp = (SetAclOp) op;
812      fsDir.unprotectedSetAcl(setAclOp.src, setAclOp.aclEntries);
813      break;
814    }
815    case OP_SET_XATTR: {
816      SetXAttrOp setXAttrOp = (SetXAttrOp) op;
817      fsDir.unprotectedSetXAttrs(setXAttrOp.src, setXAttrOp.xAttrs,
818          EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE));
819      if (toAddRetryCache) {
820        fsNamesys.addCacheEntry(setXAttrOp.rpcClientId, setXAttrOp.rpcCallId);
821      }
822      break;
823    }
824    case OP_REMOVE_XATTR: {
825      RemoveXAttrOp removeXAttrOp = (RemoveXAttrOp) op;
826      fsDir.unprotectedRemoveXAttrs(removeXAttrOp.src,
827          removeXAttrOp.xAttrs);
828      if (toAddRetryCache) {
829        fsNamesys.addCacheEntry(removeXAttrOp.rpcClientId,
830            removeXAttrOp.rpcCallId);
831      }
832      break;
833    }
834    default:
835      throw new IOException("Invalid operation read " + op.opCode);
836    }
837    return inodeId;
838  }
839  
840  private static String formatEditLogReplayError(EditLogInputStream in,
841      long recentOpcodeOffsets[], long txid) {
842    StringBuilder sb = new StringBuilder();
843    sb.append("Error replaying edit log at offset " + in.getPosition());
844    sb.append(".  Expected transaction ID was ").append(txid);
845    if (recentOpcodeOffsets[0] != -1) {
846      Arrays.sort(recentOpcodeOffsets);
847      sb.append("\nRecent opcode offsets:");
848      for (long offset : recentOpcodeOffsets) {
849        if (offset != -1) {
850          sb.append(' ').append(offset);
851        }
852      }
853    }
854    return sb.toString();
855  }
856
857  /**
858   * Add a new block into the given INodeFile
859   */
860  private void addNewBlock(FSDirectory fsDir, AddBlockOp op, INodeFile file)
861      throws IOException {
862    BlockInfo[] oldBlocks = file.getBlocks();
863    Block pBlock = op.getPenultimateBlock();
864    Block newBlock= op.getLastBlock();
865    
866    if (pBlock != null) { // the penultimate block is not null
867      Preconditions.checkState(oldBlocks != null && oldBlocks.length > 0);
868      // compare pBlock with the last block of oldBlocks
869      Block oldLastBlock = oldBlocks[oldBlocks.length - 1];
870      if (oldLastBlock.getBlockId() != pBlock.getBlockId()
871          || oldLastBlock.getGenerationStamp() != pBlock.getGenerationStamp()) {
872        throw new IOException(
873            "Mismatched block IDs or generation stamps for the old last block of file "
874                + op.getPath() + ", the old last block is " + oldLastBlock
875                + ", and the block read from editlog is " + pBlock);
876      }
877      
878      oldLastBlock.setNumBytes(pBlock.getNumBytes());
879      if (oldLastBlock instanceof BlockInfoUnderConstruction) {
880        fsNamesys.getBlockManager().forceCompleteBlock(file,
881            (BlockInfoUnderConstruction) oldLastBlock);
882        fsNamesys.getBlockManager().processQueuedMessagesForBlock(pBlock);
883      }
884    } else { // the penultimate block is null
885      Preconditions.checkState(oldBlocks == null || oldBlocks.length == 0);
886    }
887    // add the new block
888    BlockInfo newBI = new BlockInfoUnderConstruction(
889          newBlock, file.getBlockReplication());
890    fsNamesys.getBlockManager().addBlockCollection(newBI, file);
891    file.addBlock(newBI);
892    fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
893  }
894  
895  /**
896   * Update in-memory data structures with new block information.
897   * @throws IOException
898   */
899  private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op,
900      INodeFile file) throws IOException {
901    // Update its block list
902    BlockInfo[] oldBlocks = file.getBlocks();
903    Block[] newBlocks = op.getBlocks();
904    String path = op.getPath();
905    
906    // Are we only updating the last block's gen stamp.
907    boolean isGenStampUpdate = oldBlocks.length == newBlocks.length;
908    
909    // First, update blocks in common
910    for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) {
911      BlockInfo oldBlock = oldBlocks[i];
912      Block newBlock = newBlocks[i];
913      
914      boolean isLastBlock = i == newBlocks.length - 1;
915      if (oldBlock.getBlockId() != newBlock.getBlockId() ||
916          (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 
917              !(isGenStampUpdate && isLastBlock))) {
918        throw new IOException("Mismatched block IDs or generation stamps, " +
919            "attempting to replace block " + oldBlock + " with " + newBlock +
920            " as block # " + i + "/" + newBlocks.length + " of " +
921            path);
922      }
923      
924      oldBlock.setNumBytes(newBlock.getNumBytes());
925      boolean changeMade =
926        oldBlock.getGenerationStamp() != newBlock.getGenerationStamp();
927      oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
928      
929      if (oldBlock instanceof BlockInfoUnderConstruction &&
930          (!isLastBlock || op.shouldCompleteLastBlock())) {
931        changeMade = true;
932        fsNamesys.getBlockManager().forceCompleteBlock(file,
933            (BlockInfoUnderConstruction) oldBlock);
934      }
935      if (changeMade) {
936        // The state or gen-stamp of the block has changed. So, we may be
937        // able to process some messages from datanodes that we previously
938        // were unable to process.
939        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
940      }
941    }
942    
943    if (newBlocks.length < oldBlocks.length) {
944      // We're removing a block from the file, e.g. abandonBlock(...)
945      if (!file.isUnderConstruction()) {
946        throw new IOException("Trying to remove a block from file " +
947            path + " which is not under construction.");
948      }
949      if (newBlocks.length != oldBlocks.length - 1) {
950        throw new IOException("Trying to remove more than one block from file "
951            + path);
952      }
953      Block oldBlock = oldBlocks[oldBlocks.length - 1];
954      boolean removed = fsDir.unprotectedRemoveBlock(path, file, oldBlock);
955      if (!removed && !(op instanceof UpdateBlocksOp)) {
956        throw new IOException("Trying to delete non-existant block " + oldBlock);
957      }
958    } else if (newBlocks.length > oldBlocks.length) {
959      // We're adding blocks
960      for (int i = oldBlocks.length; i < newBlocks.length; i++) {
961        Block newBlock = newBlocks[i];
962        BlockInfo newBI;
963        if (!op.shouldCompleteLastBlock()) {
964          // TODO: shouldn't this only be true for the last block?
965          // what about an old-version fsync() where fsync isn't called
966          // until several blocks in?
967          newBI = new BlockInfoUnderConstruction(
968              newBlock, file.getBlockReplication());
969        } else {
970          // OP_CLOSE should add finalized blocks. This code path
971          // is only executed when loading edits written by prior
972          // versions of Hadoop. Current versions always log
973          // OP_ADD operations as each block is allocated.
974          newBI = new BlockInfo(newBlock, file.getBlockReplication());
975        }
976        fsNamesys.getBlockManager().addBlockCollection(newBI, file);
977        file.addBlock(newBI);
978        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
979      }
980    }
981  }
982
983  private static void dumpOpCounts(
984      EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) {
985    StringBuilder sb = new StringBuilder();
986    sb.append("Summary of operations loaded from edit log:\n  ");
987    Joiner.on("\n  ").withKeyValueSeparator("=").appendTo(sb, opCounts);
988    FSImage.LOG.debug(sb.toString());
989  }
990
991  private void incrOpCount(FSEditLogOpCodes opCode,
992      EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts, Step step,
993      Counter counter) {
994    Holder<Integer> holder = opCounts.get(opCode);
995    if (holder == null) {
996      holder = new Holder<Integer>(1);
997      opCounts.put(opCode, holder);
998    } else {
999      holder.held++;
1000    }
1001    counter.increment();
1002  }
1003
1004  /**
1005   * Throw appropriate exception during upgrade from 203, when editlog loading
1006   * could fail due to opcode conflicts.
1007   */
1008  private void check203UpgradeFailure(int logVersion, Throwable e)
1009      throws IOException {
1010    // 0.20.203 version version has conflicting opcodes with the later releases.
1011    // The editlog must be emptied by restarting the namenode, before proceeding
1012    // with the upgrade.
1013    if (Storage.is203LayoutVersion(logVersion)
1014        && logVersion != HdfsConstants.NAMENODE_LAYOUT_VERSION) {
1015      String msg = "During upgrade failed to load the editlog version "
1016          + logVersion + " from release 0.20.203. Please go back to the old "
1017          + " release and restart the namenode. This empties the editlog "
1018          + " and saves the namespace. Resume the upgrade after this step.";
1019      throw new IOException(msg, e);
1020    }
1021  }
1022  
1023  /**
1024   * Find the last valid transaction ID in the stream.
1025   * If there are invalid or corrupt transactions in the middle of the stream,
1026   * validateEditLog will skip over them.
1027   * This reads through the stream but does not close it.
1028   */
1029  static EditLogValidation validateEditLog(EditLogInputStream in) {
1030    long lastPos = 0;
1031    long lastTxId = HdfsConstants.INVALID_TXID;
1032    long numValid = 0;
1033    FSEditLogOp op = null;
1034    while (true) {
1035      lastPos = in.getPosition();
1036      try {
1037        if ((op = in.readOp()) == null) {
1038          break;
1039        }
1040      } catch (Throwable t) {
1041        FSImage.LOG.warn("Caught exception after reading " + numValid +
1042            " ops from " + in + " while determining its valid length." +
1043            "Position was " + lastPos, t);
1044        in.resync();
1045        FSImage.LOG.warn("After resync, position is " + in.getPosition());
1046        continue;
1047      }
1048      if (lastTxId == HdfsConstants.INVALID_TXID
1049          || op.getTransactionId() > lastTxId) {
1050        lastTxId = op.getTransactionId();
1051      }
1052      numValid++;
1053    }
1054    return new EditLogValidation(lastPos, lastTxId, false);
1055  }
1056
1057  static EditLogValidation scanEditLog(EditLogInputStream in) {
1058    long lastPos = 0;
1059    long lastTxId = HdfsConstants.INVALID_TXID;
1060    long numValid = 0;
1061    FSEditLogOp op = null;
1062    while (true) {
1063      lastPos = in.getPosition();
1064      try {
1065        if ((op = in.readOp()) == null) { // TODO
1066          break;
1067        }
1068      } catch (Throwable t) {
1069        FSImage.LOG.warn("Caught exception after reading " + numValid +
1070            " ops from " + in + " while determining its valid length." +
1071            "Position was " + lastPos, t);
1072        in.resync();
1073        FSImage.LOG.warn("After resync, position is " + in.getPosition());
1074        continue;
1075      }
1076      if (lastTxId == HdfsConstants.INVALID_TXID
1077          || op.getTransactionId() > lastTxId) {
1078        lastTxId = op.getTransactionId();
1079      }
1080      numValid++;
1081    }
1082    return new EditLogValidation(lastPos, lastTxId, false);
1083  }
1084
1085  static class EditLogValidation {
1086    private final long validLength;
1087    private final long endTxId;
1088    private final boolean hasCorruptHeader;
1089
1090    EditLogValidation(long validLength, long endTxId,
1091        boolean hasCorruptHeader) {
1092      this.validLength = validLength;
1093      this.endTxId = endTxId;
1094      this.hasCorruptHeader = hasCorruptHeader;
1095    }
1096
1097    long getValidLength() { return validLength; }
1098
1099    long getEndTxId() { return endTxId; }
1100
1101    boolean hasCorruptHeader() { return hasCorruptHeader; }
1102  }
1103
1104  /**
1105   * Stream wrapper that keeps track of the current stream position.
1106   * 
1107   * This stream also allows us to set a limit on how many bytes we can read
1108   * without getting an exception.
1109   */
1110  public static class PositionTrackingInputStream extends FilterInputStream
1111      implements StreamLimiter {
1112    private long curPos = 0;
1113    private long markPos = -1;
1114    private long limitPos = Long.MAX_VALUE;
1115
1116    public PositionTrackingInputStream(InputStream is) {
1117      super(is);
1118    }
1119
1120    private void checkLimit(long amt) throws IOException {
1121      long extra = (curPos + amt) - limitPos;
1122      if (extra > 0) {
1123        throw new IOException("Tried to read " + amt + " byte(s) past " +
1124            "the limit at offset " + limitPos);
1125      }
1126    }
1127    
1128    @Override
1129    public int read() throws IOException {
1130      checkLimit(1);
1131      int ret = super.read();
1132      if (ret != -1) curPos++;
1133      return ret;
1134    }
1135
1136    @Override
1137    public int read(byte[] data) throws IOException {
1138      checkLimit(data.length);
1139      int ret = super.read(data);
1140      if (ret > 0) curPos += ret;
1141      return ret;
1142    }
1143
1144    @Override
1145    public int read(byte[] data, int offset, int length) throws IOException {
1146      checkLimit(length);
1147      int ret = super.read(data, offset, length);
1148      if (ret > 0) curPos += ret;
1149      return ret;
1150    }
1151
1152    @Override
1153    public void setLimit(long limit) {
1154      limitPos = curPos + limit;
1155    }
1156
1157    @Override
1158    public void clearLimit() {
1159      limitPos = Long.MAX_VALUE;
1160    }
1161
1162    @Override
1163    public void mark(int limit) {
1164      super.mark(limit);
1165      markPos = curPos;
1166    }
1167
1168    @Override
1169    public void reset() throws IOException {
1170      if (markPos == -1) {
1171        throw new IOException("Not marked!");
1172      }
1173      super.reset();
1174      curPos = markPos;
1175      markPos = -1;
1176    }
1177
1178    public long getPos() {
1179      return curPos;
1180    }
1181    
1182    @Override
1183    public long skip(long amt) throws IOException {
1184      long extra = (curPos + amt) - limitPos;
1185      if (extra > 0) {
1186        throw new IOException("Tried to skip " + extra + " bytes past " +
1187            "the limit at offset " + limitPos);
1188      }
1189      long ret = super.skip(amt);
1190      curPos += ret;
1191      return ret;
1192    }
1193  }
1194
1195  public long getLastAppliedTxId() {
1196    return lastAppliedTxId;
1197  }
1198
1199  /**
1200   * Creates a Step used for updating startup progress, populated with
1201   * information from the given edits.  The step always includes the log's name.
1202   * If the log has a known length, then the length is included in the step too.
1203   * 
1204   * @param edits EditLogInputStream to use for populating step
1205   * @return Step populated with information from edits
1206   * @throws IOException thrown if there is an I/O error
1207   */
1208  private static Step createStartupProgressStep(EditLogInputStream edits)
1209      throws IOException {
1210    long length = edits.length();
1211    String name = edits.getCurrentStreamName();
1212    return length != -1 ? new Step(name, length) : new Step(name);
1213  }
1214}