001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import static org.apache.hadoop.hdfs.server.namenode.FSImageFormat.renameReservedPathsOnUpgrade;
021    import static org.apache.hadoop.util.Time.now;
022    
023    import java.io.FilterInputStream;
024    import java.io.IOException;
025    import java.io.InputStream;
026    import java.util.Arrays;
027    import java.util.EnumMap;
028    import java.util.List;
029    
030    import org.apache.commons.logging.Log;
031    import org.apache.commons.logging.LogFactory;
032    import org.apache.hadoop.classification.InterfaceAudience;
033    import org.apache.hadoop.classification.InterfaceStability;
034    import org.apache.hadoop.fs.FileSystem;
035    import org.apache.hadoop.hdfs.protocol.Block;
036    import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
037    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
038    import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
039    import org.apache.hadoop.hdfs.protocol.LayoutVersion;
040    import org.apache.hadoop.hdfs.protocol.LocatedBlock;
041    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
042    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
043    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
044    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
045    import org.apache.hadoop.hdfs.server.common.Storage;
046    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp;
047    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp;
048    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCachePoolOp;
049    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp;
050    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp;
051    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp;
052    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp;
053    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp;
054    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp;
055    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp;
056    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CreateSnapshotOp;
057    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
058    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteSnapshotOp;
059    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp;
060    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp;
061    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
062    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCacheDirectiveInfoOp;
063    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCachePoolOp;
064    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp;
065    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCacheDirectiveInfoOp;
066    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCachePoolOp;
067    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp;
068    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp;
069    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp;
070    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp;
071    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetAclOp;
072    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp;
073    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op;
074    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op;
075    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp;
076    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp;
077    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp;
078    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp;
079    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp;
080    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp;
081    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
082    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
083    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
084    import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
085    import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
086    import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
087    import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
088    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
089    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
090    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
091    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
092    import org.apache.hadoop.hdfs.util.ChunkedArrayList;
093    import org.apache.hadoop.hdfs.util.Holder;
094    
095    import com.google.common.base.Joiner;
096    import com.google.common.base.Preconditions;
097    
098    @InterfaceAudience.Private
099    @InterfaceStability.Evolving
100    public class FSEditLogLoader {
101      static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName());
102      static final long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec
103    
104      private final FSNamesystem fsNamesys;
105      private long lastAppliedTxId;
106      /** Total number of end transactions loaded. */
107      private int totalEdits = 0;
108      
109      public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) {
110        this.fsNamesys = fsNamesys;
111        this.lastAppliedTxId = lastAppliedTxId;
112      }
113      
114      long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId)
115          throws IOException {
116        return loadFSEdits(edits, expectedStartingTxId, null, null);
117      }
118    
119      /**
120       * Load an edit log, and apply the changes to the in-memory structure
121       * This is where we apply edits that we've been writing to disk all
122       * along.
123       */
124      long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId,
125          StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
126        StartupProgress prog = NameNode.getStartupProgress();
127        Step step = createStartupProgressStep(edits);
128        prog.beginStep(Phase.LOADING_EDITS, step);
129        fsNamesys.writeLock();
130        try {
131          long startTime = now();
132          FSImage.LOG.info("Start loading edits file " + edits.getName());
133          long numEdits = loadEditRecords(edits, false, expectedStartingTxId,
134              startOpt, recovery);
135          FSImage.LOG.info("Edits file " + edits.getName() 
136              + " of size " + edits.length() + " edits # " + numEdits 
137              + " loaded in " + (now()-startTime)/1000 + " seconds");
138          return numEdits;
139        } finally {
140          edits.close();
141          fsNamesys.writeUnlock();
142          prog.endStep(Phase.LOADING_EDITS, step);
143        }
144      }
145    
146      long loadEditRecords(EditLogInputStream in, boolean closeOnExit,
147          long expectedStartingTxId, StartupOption startOpt,
148          MetaRecoveryContext recovery) throws IOException {
149        FSDirectory fsDir = fsNamesys.dir;
150    
151        EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
152          new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
153    
154        if (LOG.isTraceEnabled()) {
155          LOG.trace("Acquiring write lock to replay edit log");
156        }
157    
158        fsNamesys.writeLock();
159        fsDir.writeLock();
160    
161        long recentOpcodeOffsets[] = new long[4];
162        Arrays.fill(recentOpcodeOffsets, -1);
163        
164        long expectedTxId = expectedStartingTxId;
165        long numEdits = 0;
166        long lastTxId = in.getLastTxId();
167        long numTxns = (lastTxId - expectedStartingTxId) + 1;
168        StartupProgress prog = NameNode.getStartupProgress();
169        Step step = createStartupProgressStep(in);
170        prog.setTotal(Phase.LOADING_EDITS, step, numTxns);
171        Counter counter = prog.getCounter(Phase.LOADING_EDITS, step);
172        long lastLogTime = now();
173        long lastInodeId = fsNamesys.getLastInodeId();
174        
175        try {
176          while (true) {
177            try {
178              FSEditLogOp op;
179              try {
180                op = in.readOp();
181                if (op == null) {
182                  break;
183                }
184              } catch (Throwable e) {
185                // Handle a problem with our input
186                check203UpgradeFailure(in.getVersion(true), e);
187                String errorMessage =
188                  formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
189                FSImage.LOG.error(errorMessage, e);
190                if (recovery == null) {
191                   // We will only try to skip over problematic opcodes when in
192                   // recovery mode.
193                  throw new EditLogInputException(errorMessage, e, numEdits);
194                }
195                MetaRecoveryContext.editLogLoaderPrompt(
196                    "We failed to read txId " + expectedTxId,
197                    recovery, "skipping the bad section in the log");
198                in.resync();
199                continue;
200              }
201              recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] =
202                in.getPosition();
203              if (op.hasTransactionId()) {
204                if (op.getTransactionId() > expectedTxId) { 
205                  MetaRecoveryContext.editLogLoaderPrompt("There appears " +
206                      "to be a gap in the edit log.  We expected txid " +
207                      expectedTxId + ", but got txid " +
208                      op.getTransactionId() + ".", recovery, "ignoring missing " +
209                      " transaction IDs");
210                } else if (op.getTransactionId() < expectedTxId) { 
211                  MetaRecoveryContext.editLogLoaderPrompt("There appears " +
212                      "to be an out-of-order edit in the edit log.  We " +
213                      "expected txid " + expectedTxId + ", but got txid " +
214                      op.getTransactionId() + ".", recovery,
215                      "skipping the out-of-order edit");
216                  continue;
217                }
218              }
219              try {
220                if (LOG.isTraceEnabled()) {
221                  LOG.trace("op=" + op + ", startOpt=" + startOpt
222                      + ", numEdits=" + numEdits + ", totalEdits=" + totalEdits);
223                }
224                long inodeId = applyEditLogOp(op, fsDir, startOpt,
225                    in.getVersion(true), lastInodeId);
226                if (lastInodeId < inodeId) {
227                  lastInodeId = inodeId;
228                }
229              } catch (RollingUpgradeOp.RollbackException e) {
230                throw e;
231              } catch (Throwable e) {
232                LOG.error("Encountered exception on operation " + op, e);
233                if (recovery == null) {
234                  throw e instanceof IOException? (IOException)e: new IOException(e);
235                }
236    
237                MetaRecoveryContext.editLogLoaderPrompt("Failed to " +
238                 "apply edit log operation " + op + ": error " +
239                 e.getMessage(), recovery, "applying edits");
240              }
241              // Now that the operation has been successfully decoded and
242              // applied, update our bookkeeping.
243              incrOpCount(op.opCode, opCounts, step, counter);
244              if (op.hasTransactionId()) {
245                lastAppliedTxId = op.getTransactionId();
246                expectedTxId = lastAppliedTxId + 1;
247              } else {
248                expectedTxId = lastAppliedTxId = expectedStartingTxId;
249              }
250              // log progress
251              if (op.hasTransactionId()) {
252                long now = now();
253                if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
254                  long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
255                  int percent = Math.round((float) deltaTxId / numTxns * 100);
256                  LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns
257                      + " transactions completed. (" + percent + "%)");
258                  lastLogTime = now;
259                }
260              }
261              numEdits++;
262              totalEdits++;
263            } catch (RollingUpgradeOp.RollbackException e) {
264              LOG.info("Stopped at OP_START_ROLLING_UPGRADE for rollback.");
265              break;
266            } catch (MetaRecoveryContext.RequestStopException e) {
267              MetaRecoveryContext.LOG.warn("Stopped reading edit log at " +
268                  in.getPosition() + "/"  + in.length());
269              break;
270            }
271          }
272        } finally {
273          fsNamesys.resetLastInodeId(lastInodeId);
274          if(closeOnExit) {
275            in.close();
276          }
277          fsDir.writeUnlock();
278          fsNamesys.writeUnlock();
279    
280          if (LOG.isTraceEnabled()) {
281            LOG.trace("replaying edit log finished");
282          }
283    
284          if (FSImage.LOG.isDebugEnabled()) {
285            dumpOpCounts(opCounts);
286          }
287        }
288        return numEdits;
289      }
290      
291      // allocate and update last allocated inode id
292      private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion,
293          long lastInodeId) throws IOException {
294        long inodeId = inodeIdFromOp;
295    
296        if (inodeId == INodeId.GRANDFATHER_INODE_ID) {
297          if (NameNodeLayoutVersion.supports(
298              LayoutVersion.Feature.ADD_INODE_ID, logVersion)) {
299            throw new IOException("The layout version " + logVersion
300                + " supports inodeId but gave bogus inodeId");
301          }
302          inodeId = fsNamesys.allocateNewInodeId();
303        } else {
304          // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from
305          // fsimage but editlog captures more recent inodeId allocations
306          if (inodeId > lastInodeId) {
307            fsNamesys.resetLastInodeId(inodeId);
308          }
309        }
310        return inodeId;
311      }
312    
313      @SuppressWarnings("deprecation")
314      private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
315          StartupOption startOpt, int logVersion, long lastInodeId) throws IOException {
316        long inodeId = INodeId.GRANDFATHER_INODE_ID;
317        if (LOG.isTraceEnabled()) {
318          LOG.trace("replaying edit log: " + op);
319        }
320        final boolean toAddRetryCache = fsNamesys.hasRetryCache() && op.hasRpcIds();
321        
322        switch (op.opCode) {
323        case OP_ADD: {
324          AddCloseOp addCloseOp = (AddCloseOp)op;
325          final String path =
326              renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
327          if (FSNamesystem.LOG.isDebugEnabled()) {
328            FSNamesystem.LOG.debug(op.opCode + ": " + path +
329                " numblocks : " + addCloseOp.blocks.length +
330                " clientHolder " + addCloseOp.clientName +
331                " clientMachine " + addCloseOp.clientMachine);
332          }
333          // There three cases here:
334          // 1. OP_ADD to create a new file
335          // 2. OP_ADD to update file blocks
336          // 3. OP_ADD to open file for append
337    
338          // See if the file already exists (persistBlocks call)
339          final INodesInPath iip = fsDir.getLastINodeInPath(path);
340          final INodeFile oldFile = INodeFile.valueOf(
341              iip.getINode(0), path, true);
342          INodeFile newFile = oldFile;
343          if (oldFile == null) { // this is OP_ADD on a new file (case 1)
344            // versions > 0 support per file replication
345            // get name and replication
346            final short replication = fsNamesys.getBlockManager()
347                .adjustReplication(addCloseOp.replication);
348            assert addCloseOp.blocks.length == 0;
349    
350            // add to the file tree
351            inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion,
352                lastInodeId);
353            newFile = fsDir.unprotectedAddFile(inodeId,
354                path, addCloseOp.permissions, addCloseOp.aclEntries,
355                replication, addCloseOp.mtime, addCloseOp.atime,
356                addCloseOp.blockSize, true, addCloseOp.clientName,
357                addCloseOp.clientMachine);
358            fsNamesys.leaseManager.addLease(addCloseOp.clientName, path);
359    
360            // add the op into retry cache if necessary
361            if (toAddRetryCache) {
362              HdfsFileStatus stat = fsNamesys.dir.createFileStatus(
363                  HdfsFileStatus.EMPTY_NAME, newFile, Snapshot.CURRENT_STATE_ID);
364              fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
365                  addCloseOp.rpcCallId, stat);
366            }
367          } else { // This is OP_ADD on an existing file
368            if (!oldFile.isUnderConstruction()) {
369              // This is case 3: a call to append() on an already-closed file.
370              if (FSNamesystem.LOG.isDebugEnabled()) {
371                FSNamesystem.LOG.debug("Reopening an already-closed file " +
372                    "for append");
373              }
374              LocatedBlock lb = fsNamesys.prepareFileForWrite(path,
375                  oldFile, addCloseOp.clientName, addCloseOp.clientMachine, null,
376                  false, iip.getLatestSnapshotId(), false);
377              newFile = INodeFile.valueOf(fsDir.getINode(path),
378                  path, true);
379              
380              // add the op into retry cache is necessary
381              if (toAddRetryCache) {
382                fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
383                    addCloseOp.rpcCallId, lb);
384              }
385            }
386          }
387          // Fall-through for case 2.
388          // Regardless of whether it's a new file or an updated file,
389          // update the block list.
390          
391          // Update the salient file attributes.
392          newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
393          newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
394          updateBlocks(fsDir, addCloseOp, newFile);
395          break;
396        }
397        case OP_CLOSE: {
398          AddCloseOp addCloseOp = (AddCloseOp)op;
399          final String path =
400              renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
401          if (FSNamesystem.LOG.isDebugEnabled()) {
402            FSNamesystem.LOG.debug(op.opCode + ": " + path +
403                " numblocks : " + addCloseOp.blocks.length +
404                " clientHolder " + addCloseOp.clientName +
405                " clientMachine " + addCloseOp.clientMachine);
406          }
407    
408          final INodesInPath iip = fsDir.getLastINodeInPath(path);
409          final INodeFile file = INodeFile.valueOf(iip.getINode(0), path);
410    
411          // Update the salient file attributes.
412          file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
413          file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
414          updateBlocks(fsDir, addCloseOp, file);
415    
416          // Now close the file
417          if (!file.isUnderConstruction() &&
418              logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
419            // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
420            // could show up twice in a row. But after that version, this
421            // should be fixed, so we should treat it as an error.
422            throw new IOException(
423                "File is not under construction: " + path);
424          }
425          // One might expect that you could use removeLease(holder, path) here,
426          // but OP_CLOSE doesn't serialize the holder. So, remove by path.
427          if (file.isUnderConstruction()) {
428            fsNamesys.leaseManager.removeLeaseWithPrefixPath(path);
429            file.toCompleteFile(file.getModificationTime());
430          }
431          break;
432        }
433        case OP_UPDATE_BLOCKS: {
434          UpdateBlocksOp updateOp = (UpdateBlocksOp)op;
435          final String path =
436              renameReservedPathsOnUpgrade(updateOp.path, logVersion);
437          if (FSNamesystem.LOG.isDebugEnabled()) {
438            FSNamesystem.LOG.debug(op.opCode + ": " + path +
439                " numblocks : " + updateOp.blocks.length);
440          }
441          INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path),
442              path);
443          // Update in-memory data structures
444          updateBlocks(fsDir, updateOp, oldFile);
445          
446          if (toAddRetryCache) {
447            fsNamesys.addCacheEntry(updateOp.rpcClientId, updateOp.rpcCallId);
448          }
449          break;
450        }
451        case OP_ADD_BLOCK: {
452          AddBlockOp addBlockOp = (AddBlockOp) op;
453          String path = renameReservedPathsOnUpgrade(addBlockOp.getPath(), logVersion);
454          if (FSNamesystem.LOG.isDebugEnabled()) {
455            FSNamesystem.LOG.debug(op.opCode + ": " + path +
456                " new block id : " + addBlockOp.getLastBlock().getBlockId());
457          }
458          INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path), path);
459          // add the new block to the INodeFile
460          addNewBlock(fsDir, addBlockOp, oldFile);
461          break;
462        }
463        case OP_SET_REPLICATION: {
464          SetReplicationOp setReplicationOp = (SetReplicationOp)op;
465          short replication = fsNamesys.getBlockManager().adjustReplication(
466              setReplicationOp.replication);
467          fsDir.unprotectedSetReplication(
468              renameReservedPathsOnUpgrade(setReplicationOp.path, logVersion),
469                                          replication, null);
470          break;
471        }
472        case OP_CONCAT_DELETE: {
473          ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
474          String trg = renameReservedPathsOnUpgrade(concatDeleteOp.trg, logVersion);
475          String[] srcs = new String[concatDeleteOp.srcs.length];
476          for (int i=0; i<srcs.length; i++) {
477            srcs[i] =
478                renameReservedPathsOnUpgrade(concatDeleteOp.srcs[i], logVersion);
479          }
480          fsDir.unprotectedConcat(trg, srcs, concatDeleteOp.timestamp);
481          
482          if (toAddRetryCache) {
483            fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId,
484                concatDeleteOp.rpcCallId);
485          }
486          break;
487        }
488        case OP_RENAME_OLD: {
489          RenameOldOp renameOp = (RenameOldOp)op;
490          final String src = renameReservedPathsOnUpgrade(renameOp.src, logVersion);
491          final String dst = renameReservedPathsOnUpgrade(renameOp.dst, logVersion);
492          fsDir.unprotectedRenameTo(src, dst,
493                                    renameOp.timestamp);
494          
495          if (toAddRetryCache) {
496            fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
497          }
498          break;
499        }
500        case OP_DELETE: {
501          DeleteOp deleteOp = (DeleteOp)op;
502          fsDir.unprotectedDelete(
503              renameReservedPathsOnUpgrade(deleteOp.path, logVersion),
504              deleteOp.timestamp);
505          
506          if (toAddRetryCache) {
507            fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId);
508          }
509          break;
510        }
511        case OP_MKDIR: {
512          MkdirOp mkdirOp = (MkdirOp)op;
513          inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion,
514              lastInodeId);
515          fsDir.unprotectedMkdir(inodeId,
516              renameReservedPathsOnUpgrade(mkdirOp.path, logVersion),
517              mkdirOp.permissions, mkdirOp.aclEntries, mkdirOp.timestamp);
518          break;
519        }
520        case OP_SET_GENSTAMP_V1: {
521          SetGenstampV1Op setGenstampV1Op = (SetGenstampV1Op)op;
522          fsNamesys.setGenerationStampV1(setGenstampV1Op.genStampV1);
523          break;
524        }
525        case OP_SET_PERMISSIONS: {
526          SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
527          fsDir.unprotectedSetPermission(
528              renameReservedPathsOnUpgrade(setPermissionsOp.src, logVersion),
529              setPermissionsOp.permissions);
530          break;
531        }
532        case OP_SET_OWNER: {
533          SetOwnerOp setOwnerOp = (SetOwnerOp)op;
534          fsDir.unprotectedSetOwner(
535              renameReservedPathsOnUpgrade(setOwnerOp.src, logVersion),
536              setOwnerOp.username, setOwnerOp.groupname);
537          break;
538        }
539        case OP_SET_NS_QUOTA: {
540          SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
541          fsDir.unprotectedSetQuota(
542              renameReservedPathsOnUpgrade(setNSQuotaOp.src, logVersion),
543              setNSQuotaOp.nsQuota, HdfsConstants.QUOTA_DONT_SET);
544          break;
545        }
546        case OP_CLEAR_NS_QUOTA: {
547          ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
548          fsDir.unprotectedSetQuota(
549              renameReservedPathsOnUpgrade(clearNSQuotaOp.src, logVersion),
550              HdfsConstants.QUOTA_RESET, HdfsConstants.QUOTA_DONT_SET);
551          break;
552        }
553    
554        case OP_SET_QUOTA:
555          SetQuotaOp setQuotaOp = (SetQuotaOp)op;
556          fsDir.unprotectedSetQuota(
557              renameReservedPathsOnUpgrade(setQuotaOp.src, logVersion),
558              setQuotaOp.nsQuota, setQuotaOp.dsQuota);
559          break;
560    
561        case OP_TIMES: {
562          TimesOp timesOp = (TimesOp)op;
563    
564          fsDir.unprotectedSetTimes(
565              renameReservedPathsOnUpgrade(timesOp.path, logVersion),
566              timesOp.mtime, timesOp.atime, true);
567          break;
568        }
569        case OP_SYMLINK: {
570          if (!FileSystem.areSymlinksEnabled()) {
571            throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS");
572          }
573          SymlinkOp symlinkOp = (SymlinkOp)op;
574          inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion,
575              lastInodeId);
576          fsDir.unprotectedAddSymlink(inodeId,
577              renameReservedPathsOnUpgrade(symlinkOp.path, logVersion),
578              symlinkOp.value, symlinkOp.mtime, symlinkOp.atime,
579              symlinkOp.permissionStatus);
580          
581          if (toAddRetryCache) {
582            fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId);
583          }
584          break;
585        }
586        case OP_RENAME: {
587          RenameOp renameOp = (RenameOp)op;
588          fsDir.unprotectedRenameTo(
589              renameReservedPathsOnUpgrade(renameOp.src, logVersion),
590              renameReservedPathsOnUpgrade(renameOp.dst, logVersion),
591              renameOp.timestamp, renameOp.options);
592          
593          if (toAddRetryCache) {
594            fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
595          }
596          break;
597        }
598        case OP_GET_DELEGATION_TOKEN: {
599          GetDelegationTokenOp getDelegationTokenOp
600            = (GetDelegationTokenOp)op;
601    
602          fsNamesys.getDelegationTokenSecretManager()
603            .addPersistedDelegationToken(getDelegationTokenOp.token,
604                                         getDelegationTokenOp.expiryTime);
605          break;
606        }
607        case OP_RENEW_DELEGATION_TOKEN: {
608          RenewDelegationTokenOp renewDelegationTokenOp
609            = (RenewDelegationTokenOp)op;
610          fsNamesys.getDelegationTokenSecretManager()
611            .updatePersistedTokenRenewal(renewDelegationTokenOp.token,
612                                         renewDelegationTokenOp.expiryTime);
613          break;
614        }
615        case OP_CANCEL_DELEGATION_TOKEN: {
616          CancelDelegationTokenOp cancelDelegationTokenOp
617            = (CancelDelegationTokenOp)op;
618          fsNamesys.getDelegationTokenSecretManager()
619              .updatePersistedTokenCancellation(
620                  cancelDelegationTokenOp.token);
621          break;
622        }
623        case OP_UPDATE_MASTER_KEY: {
624          UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op;
625          fsNamesys.getDelegationTokenSecretManager()
626            .updatePersistedMasterKey(updateMasterKeyOp.key);
627          break;
628        }
629        case OP_REASSIGN_LEASE: {
630          ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op;
631    
632          Lease lease = fsNamesys.leaseManager.getLease(
633              reassignLeaseOp.leaseHolder);
634          final String path =
635              renameReservedPathsOnUpgrade(reassignLeaseOp.path, logVersion);
636          INodeFile pendingFile = fsDir.getINode(path).asFile();
637          Preconditions.checkState(pendingFile.isUnderConstruction());
638          fsNamesys.reassignLeaseInternal(lease,
639              path, reassignLeaseOp.newHolder, pendingFile);
640          break;
641        }
642        case OP_START_LOG_SEGMENT:
643        case OP_END_LOG_SEGMENT: {
644          // no data in here currently.
645          break;
646        }
647        case OP_CREATE_SNAPSHOT: {
648          CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op;
649          final String snapshotRoot =
650              renameReservedPathsOnUpgrade(createSnapshotOp.snapshotRoot,
651                  logVersion);
652          String path = fsNamesys.getSnapshotManager().createSnapshot(
653              snapshotRoot, createSnapshotOp.snapshotName);
654          if (toAddRetryCache) {
655            fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId,
656                createSnapshotOp.rpcCallId, path);
657          }
658          break;
659        }
660        case OP_DELETE_SNAPSHOT: {
661          DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op;
662          BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
663          List<INode> removedINodes = new ChunkedArrayList<INode>();
664          final String snapshotRoot =
665              renameReservedPathsOnUpgrade(deleteSnapshotOp.snapshotRoot,
666                  logVersion);
667          fsNamesys.getSnapshotManager().deleteSnapshot(
668              snapshotRoot, deleteSnapshotOp.snapshotName,
669              collectedBlocks, removedINodes);
670          fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
671          collectedBlocks.clear();
672          fsNamesys.dir.removeFromInodeMap(removedINodes);
673          removedINodes.clear();
674          
675          if (toAddRetryCache) {
676            fsNamesys.addCacheEntry(deleteSnapshotOp.rpcClientId,
677                deleteSnapshotOp.rpcCallId);
678          }
679          break;
680        }
681        case OP_RENAME_SNAPSHOT: {
682          RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op;
683          final String snapshotRoot =
684              renameReservedPathsOnUpgrade(renameSnapshotOp.snapshotRoot,
685                  logVersion);
686          fsNamesys.getSnapshotManager().renameSnapshot(
687              snapshotRoot, renameSnapshotOp.snapshotOldName,
688              renameSnapshotOp.snapshotNewName);
689          
690          if (toAddRetryCache) {
691            fsNamesys.addCacheEntry(renameSnapshotOp.rpcClientId,
692                renameSnapshotOp.rpcCallId);
693          }
694          break;
695        }
696        case OP_ALLOW_SNAPSHOT: {
697          AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op;
698          final String snapshotRoot =
699              renameReservedPathsOnUpgrade(allowSnapshotOp.snapshotRoot, logVersion);
700          fsNamesys.getSnapshotManager().setSnapshottable(
701              snapshotRoot, false);
702          break;
703        }
704        case OP_DISALLOW_SNAPSHOT: {
705          DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op;
706          final String snapshotRoot =
707              renameReservedPathsOnUpgrade(disallowSnapshotOp.snapshotRoot,
708                  logVersion);
709          fsNamesys.getSnapshotManager().resetSnapshottable(
710              snapshotRoot);
711          break;
712        }
713        case OP_SET_GENSTAMP_V2: {
714          SetGenstampV2Op setGenstampV2Op = (SetGenstampV2Op) op;
715          fsNamesys.setGenerationStampV2(setGenstampV2Op.genStampV2);
716          break;
717        }
718        case OP_ALLOCATE_BLOCK_ID: {
719          AllocateBlockIdOp allocateBlockIdOp = (AllocateBlockIdOp) op;
720          fsNamesys.setLastAllocatedBlockId(allocateBlockIdOp.blockId);
721          break;
722        }
723        case OP_ROLLING_UPGRADE_START: {
724          if (startOpt == StartupOption.ROLLINGUPGRADE) {
725            final RollingUpgradeStartupOption rollingUpgradeOpt
726                = startOpt.getRollingUpgradeStartupOption(); 
727            if (rollingUpgradeOpt == RollingUpgradeStartupOption.ROLLBACK) {
728              throw new RollingUpgradeOp.RollbackException();
729            } else if (rollingUpgradeOpt == RollingUpgradeStartupOption.DOWNGRADE) {
730              //ignore upgrade marker
731              break;
732            }
733          }
734          // start rolling upgrade
735          final long startTime = ((RollingUpgradeOp) op).getTime();
736          fsNamesys.startRollingUpgradeInternal(startTime);
737          fsNamesys.triggerRollbackCheckpoint();
738          break;
739        }
740        case OP_ROLLING_UPGRADE_FINALIZE: {
741          final long finalizeTime = ((RollingUpgradeOp) op).getTime();
742          fsNamesys.finalizeRollingUpgradeInternal(finalizeTime);
743          fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
744              NameNodeFile.IMAGE);
745          break;
746        }
747        case OP_ADD_CACHE_DIRECTIVE: {
748          AddCacheDirectiveInfoOp addOp = (AddCacheDirectiveInfoOp) op;
749          CacheDirectiveInfo result = fsNamesys.
750              getCacheManager().addDirectiveFromEditLog(addOp.directive);
751          if (toAddRetryCache) {
752            Long id = result.getId();
753            fsNamesys.addCacheEntryWithPayload(op.rpcClientId, op.rpcCallId, id);
754          }
755          break;
756        }
757        case OP_MODIFY_CACHE_DIRECTIVE: {
758          ModifyCacheDirectiveInfoOp modifyOp =
759              (ModifyCacheDirectiveInfoOp) op;
760          fsNamesys.getCacheManager().modifyDirectiveFromEditLog(
761              modifyOp.directive);
762          if (toAddRetryCache) {
763            fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
764          }
765          break;
766        }
767        case OP_REMOVE_CACHE_DIRECTIVE: {
768          RemoveCacheDirectiveInfoOp removeOp =
769              (RemoveCacheDirectiveInfoOp) op;
770          fsNamesys.getCacheManager().removeDirective(removeOp.id, null);
771          if (toAddRetryCache) {
772            fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
773          }
774          break;
775        }
776        case OP_ADD_CACHE_POOL: {
777          AddCachePoolOp addOp = (AddCachePoolOp) op;
778          fsNamesys.getCacheManager().addCachePool(addOp.info);
779          if (toAddRetryCache) {
780            fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
781          }
782          break;
783        }
784        case OP_MODIFY_CACHE_POOL: {
785          ModifyCachePoolOp modifyOp = (ModifyCachePoolOp) op;
786          fsNamesys.getCacheManager().modifyCachePool(modifyOp.info);
787          if (toAddRetryCache) {
788            fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
789          }
790          break;
791        }
792        case OP_REMOVE_CACHE_POOL: {
793          RemoveCachePoolOp removeOp = (RemoveCachePoolOp) op;
794          fsNamesys.getCacheManager().removeCachePool(removeOp.poolName);
795          if (toAddRetryCache) {
796            fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
797          }
798          break;
799        }
800        case OP_SET_ACL: {
801          SetAclOp setAclOp = (SetAclOp) op;
802          fsDir.unprotectedSetAcl(setAclOp.src, setAclOp.aclEntries);
803          break;
804        }
805        default:
806          throw new IOException("Invalid operation read " + op.opCode);
807        }
808        return inodeId;
809      }
810      
811      private static String formatEditLogReplayError(EditLogInputStream in,
812          long recentOpcodeOffsets[], long txid) {
813        StringBuilder sb = new StringBuilder();
814        sb.append("Error replaying edit log at offset " + in.getPosition());
815        sb.append(".  Expected transaction ID was ").append(txid);
816        if (recentOpcodeOffsets[0] != -1) {
817          Arrays.sort(recentOpcodeOffsets);
818          sb.append("\nRecent opcode offsets:");
819          for (long offset : recentOpcodeOffsets) {
820            if (offset != -1) {
821              sb.append(' ').append(offset);
822            }
823          }
824        }
825        return sb.toString();
826      }
827    
828      /**
829       * Add a new block into the given INodeFile
830       */
831      private void addNewBlock(FSDirectory fsDir, AddBlockOp op, INodeFile file)
832          throws IOException {
833        BlockInfo[] oldBlocks = file.getBlocks();
834        Block pBlock = op.getPenultimateBlock();
835        Block newBlock= op.getLastBlock();
836        
837        if (pBlock != null) { // the penultimate block is not null
838          Preconditions.checkState(oldBlocks != null && oldBlocks.length > 0);
839          // compare pBlock with the last block of oldBlocks
840          Block oldLastBlock = oldBlocks[oldBlocks.length - 1];
841          if (oldLastBlock.getBlockId() != pBlock.getBlockId()
842              || oldLastBlock.getGenerationStamp() != pBlock.getGenerationStamp()) {
843            throw new IOException(
844                "Mismatched block IDs or generation stamps for the old last block of file "
845                    + op.getPath() + ", the old last block is " + oldLastBlock
846                    + ", and the block read from editlog is " + pBlock);
847          }
848          
849          oldLastBlock.setNumBytes(pBlock.getNumBytes());
850          if (oldLastBlock instanceof BlockInfoUnderConstruction) {
851            fsNamesys.getBlockManager().forceCompleteBlock(file,
852                (BlockInfoUnderConstruction) oldLastBlock);
853            fsNamesys.getBlockManager().processQueuedMessagesForBlock(pBlock);
854          }
855        } else { // the penultimate block is null
856          Preconditions.checkState(oldBlocks == null || oldBlocks.length == 0);
857        }
858        // add the new block
859        BlockInfo newBI = new BlockInfoUnderConstruction(
860              newBlock, file.getBlockReplication());
861        fsNamesys.getBlockManager().addBlockCollection(newBI, file);
862        file.addBlock(newBI);
863        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
864      }
865      
866      /**
867       * Update in-memory data structures with new block information.
868       * @throws IOException
869       */
870      private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op,
871          INodeFile file) throws IOException {
872        // Update its block list
873        BlockInfo[] oldBlocks = file.getBlocks();
874        Block[] newBlocks = op.getBlocks();
875        String path = op.getPath();
876        
877        // Are we only updating the last block's gen stamp.
878        boolean isGenStampUpdate = oldBlocks.length == newBlocks.length;
879        
880        // First, update blocks in common
881        for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) {
882          BlockInfo oldBlock = oldBlocks[i];
883          Block newBlock = newBlocks[i];
884          
885          boolean isLastBlock = i == newBlocks.length - 1;
886          if (oldBlock.getBlockId() != newBlock.getBlockId() ||
887              (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 
888                  !(isGenStampUpdate && isLastBlock))) {
889            throw new IOException("Mismatched block IDs or generation stamps, " +
890                "attempting to replace block " + oldBlock + " with " + newBlock +
891                " as block # " + i + "/" + newBlocks.length + " of " +
892                path);
893          }
894          
895          oldBlock.setNumBytes(newBlock.getNumBytes());
896          boolean changeMade =
897            oldBlock.getGenerationStamp() != newBlock.getGenerationStamp();
898          oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
899          
900          if (oldBlock instanceof BlockInfoUnderConstruction &&
901              (!isLastBlock || op.shouldCompleteLastBlock())) {
902            changeMade = true;
903            fsNamesys.getBlockManager().forceCompleteBlock(file,
904                (BlockInfoUnderConstruction) oldBlock);
905          }
906          if (changeMade) {
907            // The state or gen-stamp of the block has changed. So, we may be
908            // able to process some messages from datanodes that we previously
909            // were unable to process.
910            fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
911          }
912        }
913        
914        if (newBlocks.length < oldBlocks.length) {
915          // We're removing a block from the file, e.g. abandonBlock(...)
916          if (!file.isUnderConstruction()) {
917            throw new IOException("Trying to remove a block from file " +
918                path + " which is not under construction.");
919          }
920          if (newBlocks.length != oldBlocks.length - 1) {
921            throw new IOException("Trying to remove more than one block from file "
922                + path);
923          }
924          Block oldBlock = oldBlocks[oldBlocks.length - 1];
925          boolean removed = fsDir.unprotectedRemoveBlock(path, file, oldBlock);
926          if (!removed && !(op instanceof UpdateBlocksOp)) {
927            throw new IOException("Trying to delete non-existant block " + oldBlock);
928          }
929        } else if (newBlocks.length > oldBlocks.length) {
930          // We're adding blocks
931          for (int i = oldBlocks.length; i < newBlocks.length; i++) {
932            Block newBlock = newBlocks[i];
933            BlockInfo newBI;
934            if (!op.shouldCompleteLastBlock()) {
935              // TODO: shouldn't this only be true for the last block?
936              // what about an old-version fsync() where fsync isn't called
937              // until several blocks in?
938              newBI = new BlockInfoUnderConstruction(
939                  newBlock, file.getBlockReplication());
940            } else {
941              // OP_CLOSE should add finalized blocks. This code path
942              // is only executed when loading edits written by prior
943              // versions of Hadoop. Current versions always log
944              // OP_ADD operations as each block is allocated.
945              newBI = new BlockInfo(newBlock, file.getBlockReplication());
946            }
947            fsNamesys.getBlockManager().addBlockCollection(newBI, file);
948            file.addBlock(newBI);
949            fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
950          }
951        }
952      }
953    
954      private static void dumpOpCounts(
955          EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) {
956        StringBuilder sb = new StringBuilder();
957        sb.append("Summary of operations loaded from edit log:\n  ");
958        Joiner.on("\n  ").withKeyValueSeparator("=").appendTo(sb, opCounts);
959        FSImage.LOG.debug(sb.toString());
960      }
961    
962      private void incrOpCount(FSEditLogOpCodes opCode,
963          EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts, Step step,
964          Counter counter) {
965        Holder<Integer> holder = opCounts.get(opCode);
966        if (holder == null) {
967          holder = new Holder<Integer>(1);
968          opCounts.put(opCode, holder);
969        } else {
970          holder.held++;
971        }
972        counter.increment();
973      }
974    
975      /**
976       * Throw appropriate exception during upgrade from 203, when editlog loading
977       * could fail due to opcode conflicts.
978       */
979      private void check203UpgradeFailure(int logVersion, Throwable e)
980          throws IOException {
981        // 0.20.203 version version has conflicting opcodes with the later releases.
982        // The editlog must be emptied by restarting the namenode, before proceeding
983        // with the upgrade.
984        if (Storage.is203LayoutVersion(logVersion)
985            && logVersion != HdfsConstants.NAMENODE_LAYOUT_VERSION) {
986          String msg = "During upgrade failed to load the editlog version "
987              + logVersion + " from release 0.20.203. Please go back to the old "
988              + " release and restart the namenode. This empties the editlog "
989              + " and saves the namespace. Resume the upgrade after this step.";
990          throw new IOException(msg, e);
991        }
992      }
993      
994      /**
995       * Find the last valid transaction ID in the stream.
996       * If there are invalid or corrupt transactions in the middle of the stream,
997       * validateEditLog will skip over them.
998       * This reads through the stream but does not close it.
999       *
1000       * @throws IOException if the stream cannot be read due to an IO error (eg
1001       *                     if the log does not exist)
1002       */
1003      static EditLogValidation validateEditLog(EditLogInputStream in) {
1004        long lastPos = 0;
1005        long lastTxId = HdfsConstants.INVALID_TXID;
1006        long numValid = 0;
1007        FSEditLogOp op = null;
1008        while (true) {
1009          lastPos = in.getPosition();
1010          try {
1011            if ((op = in.readOp()) == null) {
1012              break;
1013            }
1014          } catch (Throwable t) {
1015            FSImage.LOG.warn("Caught exception after reading " + numValid +
1016                " ops from " + in + " while determining its valid length." +
1017                "Position was " + lastPos, t);
1018            in.resync();
1019            FSImage.LOG.warn("After resync, position is " + in.getPosition());
1020            continue;
1021          }
1022          if (lastTxId == HdfsConstants.INVALID_TXID
1023              || op.getTransactionId() > lastTxId) {
1024            lastTxId = op.getTransactionId();
1025          }
1026          numValid++;
1027        }
1028        return new EditLogValidation(lastPos, lastTxId, false);
1029      }
1030    
1031      static EditLogValidation scanEditLog(EditLogInputStream in) {
1032        long lastPos = 0;
1033        long lastTxId = HdfsConstants.INVALID_TXID;
1034        long numValid = 0;
1035        FSEditLogOp op = null;
1036        while (true) {
1037          lastPos = in.getPosition();
1038          try {
1039            if ((op = in.readOp()) == null) { // TODO
1040              break;
1041            }
1042          } catch (Throwable t) {
1043            FSImage.LOG.warn("Caught exception after reading " + numValid +
1044                " ops from " + in + " while determining its valid length." +
1045                "Position was " + lastPos, t);
1046            in.resync();
1047            FSImage.LOG.warn("After resync, position is " + in.getPosition());
1048            continue;
1049          }
1050          if (lastTxId == HdfsConstants.INVALID_TXID
1051              || op.getTransactionId() > lastTxId) {
1052            lastTxId = op.getTransactionId();
1053          }
1054          numValid++;
1055        }
1056        return new EditLogValidation(lastPos, lastTxId, false);
1057      }
1058    
1059      static class EditLogValidation {
1060        private final long validLength;
1061        private final long endTxId;
1062        private final boolean hasCorruptHeader;
1063    
1064        EditLogValidation(long validLength, long endTxId,
1065            boolean hasCorruptHeader) {
1066          this.validLength = validLength;
1067          this.endTxId = endTxId;
1068          this.hasCorruptHeader = hasCorruptHeader;
1069        }
1070    
1071        long getValidLength() { return validLength; }
1072    
1073        long getEndTxId() { return endTxId; }
1074    
1075        boolean hasCorruptHeader() { return hasCorruptHeader; }
1076      }
1077    
1078      /**
1079       * Stream wrapper that keeps track of the current stream position.
1080       * 
1081       * This stream also allows us to set a limit on how many bytes we can read
1082       * without getting an exception.
1083       */
1084      public static class PositionTrackingInputStream extends FilterInputStream
1085          implements StreamLimiter {
1086        private long curPos = 0;
1087        private long markPos = -1;
1088        private long limitPos = Long.MAX_VALUE;
1089    
1090        public PositionTrackingInputStream(InputStream is) {
1091          super(is);
1092        }
1093    
1094        private void checkLimit(long amt) throws IOException {
1095          long extra = (curPos + amt) - limitPos;
1096          if (extra > 0) {
1097            throw new IOException("Tried to read " + amt + " byte(s) past " +
1098                "the limit at offset " + limitPos);
1099          }
1100        }
1101        
1102        @Override
1103        public int read() throws IOException {
1104          checkLimit(1);
1105          int ret = super.read();
1106          if (ret != -1) curPos++;
1107          return ret;
1108        }
1109    
1110        @Override
1111        public int read(byte[] data) throws IOException {
1112          checkLimit(data.length);
1113          int ret = super.read(data);
1114          if (ret > 0) curPos += ret;
1115          return ret;
1116        }
1117    
1118        @Override
1119        public int read(byte[] data, int offset, int length) throws IOException {
1120          checkLimit(length);
1121          int ret = super.read(data, offset, length);
1122          if (ret > 0) curPos += ret;
1123          return ret;
1124        }
1125    
1126        @Override
1127        public void setLimit(long limit) {
1128          limitPos = curPos + limit;
1129        }
1130    
1131        @Override
1132        public void clearLimit() {
1133          limitPos = Long.MAX_VALUE;
1134        }
1135    
1136        @Override
1137        public void mark(int limit) {
1138          super.mark(limit);
1139          markPos = curPos;
1140        }
1141    
1142        @Override
1143        public void reset() throws IOException {
1144          if (markPos == -1) {
1145            throw new IOException("Not marked!");
1146          }
1147          super.reset();
1148          curPos = markPos;
1149          markPos = -1;
1150        }
1151    
1152        public long getPos() {
1153          return curPos;
1154        }
1155        
1156        @Override
1157        public long skip(long amt) throws IOException {
1158          long extra = (curPos + amt) - limitPos;
1159          if (extra > 0) {
1160            throw new IOException("Tried to skip " + extra + " bytes past " +
1161                "the limit at offset " + limitPos);
1162          }
1163          long ret = super.skip(amt);
1164          curPos += ret;
1165          return ret;
1166        }
1167      }
1168    
1169      public long getLastAppliedTxId() {
1170        return lastAppliedTxId;
1171      }
1172    
1173      /**
1174       * Creates a Step used for updating startup progress, populated with
1175       * information from the given edits.  The step always includes the log's name.
1176       * If the log has a known length, then the length is included in the step too.
1177       * 
1178       * @param edits EditLogInputStream to use for populating step
1179       * @return Step populated with information from edits
1180       * @throws IOException thrown if there is an I/O error
1181       */
1182      private static Step createStartupProgressStep(EditLogInputStream edits)
1183          throws IOException {
1184        long length = edits.length();
1185        String name = edits.getCurrentStreamName();
1186        return length != -1 ? new Step(name, length) : new Step(name);
1187      }
1188    }