001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import static org.apache.hadoop.hdfs.server.namenode.FSImageFormat.renameReservedPathsOnUpgrade;
021import static org.apache.hadoop.util.Time.now;
022
023import java.io.FilterInputStream;
024import java.io.IOException;
025import java.io.InputStream;
026import java.util.Arrays;
027import java.util.EnumMap;
028import java.util.EnumSet;
029import java.util.List;
030
031import org.apache.commons.logging.Log;
032import org.apache.commons.logging.LogFactory;
033import org.apache.hadoop.classification.InterfaceAudience;
034import org.apache.hadoop.classification.InterfaceStability;
035import org.apache.hadoop.fs.FileSystem;
036import org.apache.hadoop.fs.XAttrSetFlag;
037import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
038import org.apache.hadoop.hdfs.protocol.Block;
039import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
040import org.apache.hadoop.hdfs.protocol.HdfsConstants;
041import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
042import org.apache.hadoop.hdfs.protocol.LayoutVersion;
043import org.apache.hadoop.hdfs.protocol.LocatedBlock;
044import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
045import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
046import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
047import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
048import org.apache.hadoop.hdfs.server.common.Storage;
049import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp;
050import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp;
051import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCachePoolOp;
052import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp;
053import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp;
054import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp;
055import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp;
056import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp;
057import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp;
058import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp;
059import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CreateSnapshotOp;
060import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
061import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteSnapshotOp;
062import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp;
063import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp;
064import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
065import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCacheDirectiveInfoOp;
066import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCachePoolOp;
067import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp;
068import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCacheDirectiveInfoOp;
069import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCachePoolOp;
070import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp;
071import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp;
072import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp;
073import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp;
074import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetAclOp;
075import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp;
076import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op;
077import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op;
078import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp;
079import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp;
080import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp;
081import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp;
082import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp;
083import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetStoragePolicyOp;
084import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetXAttrOp;
085import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveXAttrOp;
086import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp;
087import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
088import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
089import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
090import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
091import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
092import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
093import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
094import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
095import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
096import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
097import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
098import org.apache.hadoop.hdfs.util.ChunkedArrayList;
099import org.apache.hadoop.hdfs.util.Holder;
100
101import com.google.common.base.Joiner;
102import com.google.common.base.Preconditions;
103
104@InterfaceAudience.Private
105@InterfaceStability.Evolving
106public class FSEditLogLoader {
107  static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName());
108  static final long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec
109
110  private final FSNamesystem fsNamesys;
111  private long lastAppliedTxId;
112  /** Total number of end transactions loaded. */
113  private int totalEdits = 0;
114  
115  public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) {
116    this.fsNamesys = fsNamesys;
117    this.lastAppliedTxId = lastAppliedTxId;
118  }
119  
120  long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId)
121      throws IOException {
122    return loadFSEdits(edits, expectedStartingTxId, null, null);
123  }
124
125  /**
126   * Load an edit log, and apply the changes to the in-memory structure
127   * This is where we apply edits that we've been writing to disk all
128   * along.
129   */
130  long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId,
131      StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
132    StartupProgress prog = NameNode.getStartupProgress();
133    Step step = createStartupProgressStep(edits);
134    prog.beginStep(Phase.LOADING_EDITS, step);
135    fsNamesys.writeLock();
136    try {
137      long startTime = now();
138      FSImage.LOG.info("Start loading edits file " + edits.getName());
139      long numEdits = loadEditRecords(edits, false, expectedStartingTxId,
140          startOpt, recovery);
141      FSImage.LOG.info("Edits file " + edits.getName() 
142          + " of size " + edits.length() + " edits # " + numEdits 
143          + " loaded in " + (now()-startTime)/1000 + " seconds");
144      return numEdits;
145    } finally {
146      edits.close();
147      fsNamesys.writeUnlock();
148      prog.endStep(Phase.LOADING_EDITS, step);
149    }
150  }
151
152  long loadEditRecords(EditLogInputStream in, boolean closeOnExit,
153      long expectedStartingTxId, StartupOption startOpt,
154      MetaRecoveryContext recovery) throws IOException {
155    FSDirectory fsDir = fsNamesys.dir;
156
157    EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
158      new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
159
160    if (LOG.isTraceEnabled()) {
161      LOG.trace("Acquiring write lock to replay edit log");
162    }
163
164    fsNamesys.writeLock();
165    fsDir.writeLock();
166
167    long recentOpcodeOffsets[] = new long[4];
168    Arrays.fill(recentOpcodeOffsets, -1);
169    
170    long expectedTxId = expectedStartingTxId;
171    long numEdits = 0;
172    long lastTxId = in.getLastTxId();
173    long numTxns = (lastTxId - expectedStartingTxId) + 1;
174    StartupProgress prog = NameNode.getStartupProgress();
175    Step step = createStartupProgressStep(in);
176    prog.setTotal(Phase.LOADING_EDITS, step, numTxns);
177    Counter counter = prog.getCounter(Phase.LOADING_EDITS, step);
178    long lastLogTime = now();
179    long lastInodeId = fsNamesys.getLastInodeId();
180    
181    try {
182      while (true) {
183        try {
184          FSEditLogOp op;
185          try {
186            op = in.readOp();
187            if (op == null) {
188              break;
189            }
190          } catch (Throwable e) {
191            // Handle a problem with our input
192            check203UpgradeFailure(in.getVersion(true), e);
193            String errorMessage =
194              formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
195            FSImage.LOG.error(errorMessage, e);
196            if (recovery == null) {
197               // We will only try to skip over problematic opcodes when in
198               // recovery mode.
199              throw new EditLogInputException(errorMessage, e, numEdits);
200            }
201            MetaRecoveryContext.editLogLoaderPrompt(
202                "We failed to read txId " + expectedTxId,
203                recovery, "skipping the bad section in the log");
204            in.resync();
205            continue;
206          }
207          recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] =
208            in.getPosition();
209          if (op.hasTransactionId()) {
210            if (op.getTransactionId() > expectedTxId) { 
211              MetaRecoveryContext.editLogLoaderPrompt("There appears " +
212                  "to be a gap in the edit log.  We expected txid " +
213                  expectedTxId + ", but got txid " +
214                  op.getTransactionId() + ".", recovery, "ignoring missing " +
215                  " transaction IDs");
216            } else if (op.getTransactionId() < expectedTxId) { 
217              MetaRecoveryContext.editLogLoaderPrompt("There appears " +
218                  "to be an out-of-order edit in the edit log.  We " +
219                  "expected txid " + expectedTxId + ", but got txid " +
220                  op.getTransactionId() + ".", recovery,
221                  "skipping the out-of-order edit");
222              continue;
223            }
224          }
225          try {
226            if (LOG.isTraceEnabled()) {
227              LOG.trace("op=" + op + ", startOpt=" + startOpt
228                  + ", numEdits=" + numEdits + ", totalEdits=" + totalEdits);
229            }
230            long inodeId = applyEditLogOp(op, fsDir, startOpt,
231                in.getVersion(true), lastInodeId);
232            if (lastInodeId < inodeId) {
233              lastInodeId = inodeId;
234            }
235          } catch (RollingUpgradeOp.RollbackException e) {
236            throw e;
237          } catch (Throwable e) {
238            LOG.error("Encountered exception on operation " + op, e);
239            if (recovery == null) {
240              throw e instanceof IOException? (IOException)e: new IOException(e);
241            }
242
243            MetaRecoveryContext.editLogLoaderPrompt("Failed to " +
244             "apply edit log operation " + op + ": error " +
245             e.getMessage(), recovery, "applying edits");
246          }
247          // Now that the operation has been successfully decoded and
248          // applied, update our bookkeeping.
249          incrOpCount(op.opCode, opCounts, step, counter);
250          if (op.hasTransactionId()) {
251            lastAppliedTxId = op.getTransactionId();
252            expectedTxId = lastAppliedTxId + 1;
253          } else {
254            expectedTxId = lastAppliedTxId = expectedStartingTxId;
255          }
256          // log progress
257          if (op.hasTransactionId()) {
258            long now = now();
259            if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
260              long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
261              int percent = Math.round((float) deltaTxId / numTxns * 100);
262              LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns
263                  + " transactions completed. (" + percent + "%)");
264              lastLogTime = now;
265            }
266          }
267          numEdits++;
268          totalEdits++;
269        } catch (RollingUpgradeOp.RollbackException e) {
270          LOG.info("Stopped at OP_START_ROLLING_UPGRADE for rollback.");
271          break;
272        } catch (MetaRecoveryContext.RequestStopException e) {
273          MetaRecoveryContext.LOG.warn("Stopped reading edit log at " +
274              in.getPosition() + "/"  + in.length());
275          break;
276        }
277      }
278    } finally {
279      fsNamesys.resetLastInodeId(lastInodeId);
280      if(closeOnExit) {
281        in.close();
282      }
283      fsDir.writeUnlock();
284      fsNamesys.writeUnlock();
285
286      if (LOG.isTraceEnabled()) {
287        LOG.trace("replaying edit log finished");
288      }
289
290      if (FSImage.LOG.isDebugEnabled()) {
291        dumpOpCounts(opCounts);
292      }
293    }
294    return numEdits;
295  }
296  
297  // allocate and update last allocated inode id
298  private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion,
299      long lastInodeId) throws IOException {
300    long inodeId = inodeIdFromOp;
301
302    if (inodeId == INodeId.GRANDFATHER_INODE_ID) {
303      if (NameNodeLayoutVersion.supports(
304          LayoutVersion.Feature.ADD_INODE_ID, logVersion)) {
305        throw new IOException("The layout version " + logVersion
306            + " supports inodeId but gave bogus inodeId");
307      }
308      inodeId = fsNamesys.allocateNewInodeId();
309    } else {
310      // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from
311      // fsimage but editlog captures more recent inodeId allocations
312      if (inodeId > lastInodeId) {
313        fsNamesys.resetLastInodeId(inodeId);
314      }
315    }
316    return inodeId;
317  }
318
319  @SuppressWarnings("deprecation")
320  private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
321      StartupOption startOpt, int logVersion, long lastInodeId) throws IOException {
322    long inodeId = INodeId.GRANDFATHER_INODE_ID;
323    if (LOG.isTraceEnabled()) {
324      LOG.trace("replaying edit log: " + op);
325    }
326    final boolean toAddRetryCache = fsNamesys.hasRetryCache() && op.hasRpcIds();
327    
328    switch (op.opCode) {
329    case OP_ADD: {
330      AddCloseOp addCloseOp = (AddCloseOp)op;
331      final String path =
332          renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
333      if (FSNamesystem.LOG.isDebugEnabled()) {
334        FSNamesystem.LOG.debug(op.opCode + ": " + path +
335            " numblocks : " + addCloseOp.blocks.length +
336            " clientHolder " + addCloseOp.clientName +
337            " clientMachine " + addCloseOp.clientMachine);
338      }
339      // There three cases here:
340      // 1. OP_ADD to create a new file
341      // 2. OP_ADD to update file blocks
342      // 3. OP_ADD to open file for append
343
344      // See if the file already exists (persistBlocks call)
345      final INodesInPath iip = fsDir.getINodesInPath(path, true);
346      final INode[] inodes = iip.getINodes();
347      INodeFile oldFile = INodeFile.valueOf(
348          inodes[inodes.length - 1], path, true);
349      if (oldFile != null && addCloseOp.overwrite) {
350        // This is OP_ADD with overwrite
351        fsDir.unprotectedDelete(path, addCloseOp.mtime);
352        oldFile = null;
353      }
354      INodeFile newFile = oldFile;
355      if (oldFile == null) { // this is OP_ADD on a new file (case 1)
356        // versions > 0 support per file replication
357        // get name and replication
358        final short replication = fsNamesys.getBlockManager()
359            .adjustReplication(addCloseOp.replication);
360        assert addCloseOp.blocks.length == 0;
361
362        // add to the file tree
363        inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion,
364            lastInodeId);
365        newFile = fsDir.unprotectedAddFile(inodeId,
366            path, addCloseOp.permissions, addCloseOp.aclEntries,
367            addCloseOp.xAttrs,
368            replication, addCloseOp.mtime, addCloseOp.atime,
369            addCloseOp.blockSize, true, addCloseOp.clientName,
370            addCloseOp.clientMachine, addCloseOp.storagePolicyId);
371        fsNamesys.leaseManager.addLease(addCloseOp.clientName, path);
372
373        // add the op into retry cache if necessary
374        if (toAddRetryCache) {
375          HdfsFileStatus stat = fsNamesys.dir.createFileStatus(
376              HdfsFileStatus.EMPTY_NAME, newFile,
377              BlockStoragePolicySuite.ID_UNSPECIFIED, Snapshot.CURRENT_STATE_ID,
378              false, iip);
379          fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
380              addCloseOp.rpcCallId, stat);
381        }
382      } else { // This is OP_ADD on an existing file
383        if (!oldFile.isUnderConstruction()) {
384          // This is case 3: a call to append() on an already-closed file.
385          if (FSNamesystem.LOG.isDebugEnabled()) {
386            FSNamesystem.LOG.debug("Reopening an already-closed file " +
387                "for append");
388          }
389          LocatedBlock lb = fsNamesys.prepareFileForWrite(path,
390              iip, addCloseOp.clientName, addCloseOp.clientMachine, false, false);
391          newFile = INodeFile.valueOf(fsDir.getINode(path),
392              path, true);
393          
394          // add the op into retry cache is necessary
395          if (toAddRetryCache) {
396            fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
397                addCloseOp.rpcCallId, lb);
398          }
399        }
400      }
401      // Fall-through for case 2.
402      // Regardless of whether it's a new file or an updated file,
403      // update the block list.
404      
405      // Update the salient file attributes.
406      newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
407      newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
408      updateBlocks(fsDir, addCloseOp, newFile);
409      break;
410    }
411    case OP_CLOSE: {
412      AddCloseOp addCloseOp = (AddCloseOp)op;
413      final String path =
414          renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
415      if (FSNamesystem.LOG.isDebugEnabled()) {
416        FSNamesystem.LOG.debug(op.opCode + ": " + path +
417            " numblocks : " + addCloseOp.blocks.length +
418            " clientHolder " + addCloseOp.clientName +
419            " clientMachine " + addCloseOp.clientMachine);
420      }
421
422      final INodesInPath iip = fsDir.getLastINodeInPath(path);
423      final INodeFile file = INodeFile.valueOf(iip.getINode(0), path);
424
425      // Update the salient file attributes.
426      file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
427      file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
428      updateBlocks(fsDir, addCloseOp, file);
429
430      // Now close the file
431      if (!file.isUnderConstruction() &&
432          logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
433        // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
434        // could show up twice in a row. But after that version, this
435        // should be fixed, so we should treat it as an error.
436        throw new IOException(
437            "File is not under construction: " + path);
438      }
439      // One might expect that you could use removeLease(holder, path) here,
440      // but OP_CLOSE doesn't serialize the holder. So, remove by path.
441      if (file.isUnderConstruction()) {
442        fsNamesys.leaseManager.removeLeaseWithPrefixPath(path);
443        file.toCompleteFile(file.getModificationTime());
444      }
445      break;
446    }
447    case OP_UPDATE_BLOCKS: {
448      UpdateBlocksOp updateOp = (UpdateBlocksOp)op;
449      final String path =
450          renameReservedPathsOnUpgrade(updateOp.path, logVersion);
451      if (FSNamesystem.LOG.isDebugEnabled()) {
452        FSNamesystem.LOG.debug(op.opCode + ": " + path +
453            " numblocks : " + updateOp.blocks.length);
454      }
455      INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path),
456          path);
457      // Update in-memory data structures
458      updateBlocks(fsDir, updateOp, oldFile);
459      
460      if (toAddRetryCache) {
461        fsNamesys.addCacheEntry(updateOp.rpcClientId, updateOp.rpcCallId);
462      }
463      break;
464    }
465    case OP_ADD_BLOCK: {
466      AddBlockOp addBlockOp = (AddBlockOp) op;
467      String path = renameReservedPathsOnUpgrade(addBlockOp.getPath(), logVersion);
468      if (FSNamesystem.LOG.isDebugEnabled()) {
469        FSNamesystem.LOG.debug(op.opCode + ": " + path +
470            " new block id : " + addBlockOp.getLastBlock().getBlockId());
471      }
472      INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path), path);
473      // add the new block to the INodeFile
474      addNewBlock(fsDir, addBlockOp, oldFile);
475      break;
476    }
477    case OP_SET_REPLICATION: {
478      SetReplicationOp setReplicationOp = (SetReplicationOp)op;
479      short replication = fsNamesys.getBlockManager().adjustReplication(
480          setReplicationOp.replication);
481      fsDir.unprotectedSetReplication(
482          renameReservedPathsOnUpgrade(setReplicationOp.path, logVersion),
483                                      replication, null);
484      break;
485    }
486    case OP_CONCAT_DELETE: {
487      ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
488      String trg = renameReservedPathsOnUpgrade(concatDeleteOp.trg, logVersion);
489      String[] srcs = new String[concatDeleteOp.srcs.length];
490      for (int i=0; i<srcs.length; i++) {
491        srcs[i] =
492            renameReservedPathsOnUpgrade(concatDeleteOp.srcs[i], logVersion);
493      }
494      fsDir.unprotectedConcat(trg, srcs, concatDeleteOp.timestamp);
495      
496      if (toAddRetryCache) {
497        fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId,
498            concatDeleteOp.rpcCallId);
499      }
500      break;
501    }
502    case OP_RENAME_OLD: {
503      RenameOldOp renameOp = (RenameOldOp)op;
504      final String src = renameReservedPathsOnUpgrade(renameOp.src, logVersion);
505      final String dst = renameReservedPathsOnUpgrade(renameOp.dst, logVersion);
506      fsDir.unprotectedRenameTo(src, dst,
507                                renameOp.timestamp);
508      
509      if (toAddRetryCache) {
510        fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
511      }
512      break;
513    }
514    case OP_DELETE: {
515      DeleteOp deleteOp = (DeleteOp)op;
516      fsDir.unprotectedDelete(
517          renameReservedPathsOnUpgrade(deleteOp.path, logVersion),
518          deleteOp.timestamp);
519      
520      if (toAddRetryCache) {
521        fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId);
522      }
523      break;
524    }
525    case OP_MKDIR: {
526      MkdirOp mkdirOp = (MkdirOp)op;
527      inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion,
528          lastInodeId);
529      fsDir.unprotectedMkdir(inodeId,
530          renameReservedPathsOnUpgrade(mkdirOp.path, logVersion),
531          mkdirOp.permissions, mkdirOp.aclEntries, mkdirOp.timestamp);
532      break;
533    }
534    case OP_SET_GENSTAMP_V1: {
535      SetGenstampV1Op setGenstampV1Op = (SetGenstampV1Op)op;
536      fsNamesys.setGenerationStampV1(setGenstampV1Op.genStampV1);
537      break;
538    }
539    case OP_SET_PERMISSIONS: {
540      SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
541      fsDir.unprotectedSetPermission(
542          renameReservedPathsOnUpgrade(setPermissionsOp.src, logVersion),
543          setPermissionsOp.permissions);
544      break;
545    }
546    case OP_SET_OWNER: {
547      SetOwnerOp setOwnerOp = (SetOwnerOp)op;
548      fsDir.unprotectedSetOwner(
549          renameReservedPathsOnUpgrade(setOwnerOp.src, logVersion),
550          setOwnerOp.username, setOwnerOp.groupname);
551      break;
552    }
553    case OP_SET_NS_QUOTA: {
554      SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
555      fsDir.unprotectedSetQuota(
556          renameReservedPathsOnUpgrade(setNSQuotaOp.src, logVersion),
557          setNSQuotaOp.nsQuota, HdfsConstants.QUOTA_DONT_SET);
558      break;
559    }
560    case OP_CLEAR_NS_QUOTA: {
561      ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
562      fsDir.unprotectedSetQuota(
563          renameReservedPathsOnUpgrade(clearNSQuotaOp.src, logVersion),
564          HdfsConstants.QUOTA_RESET, HdfsConstants.QUOTA_DONT_SET);
565      break;
566    }
567
568    case OP_SET_QUOTA:
569      SetQuotaOp setQuotaOp = (SetQuotaOp)op;
570      fsDir.unprotectedSetQuota(
571          renameReservedPathsOnUpgrade(setQuotaOp.src, logVersion),
572          setQuotaOp.nsQuota, setQuotaOp.dsQuota);
573      break;
574
575    case OP_TIMES: {
576      TimesOp timesOp = (TimesOp)op;
577
578      fsDir.unprotectedSetTimes(
579          renameReservedPathsOnUpgrade(timesOp.path, logVersion),
580          timesOp.mtime, timesOp.atime, true);
581      break;
582    }
583    case OP_SYMLINK: {
584      if (!FileSystem.areSymlinksEnabled()) {
585        throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS");
586      }
587      SymlinkOp symlinkOp = (SymlinkOp)op;
588      inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion,
589          lastInodeId);
590      fsDir.unprotectedAddSymlink(inodeId,
591          renameReservedPathsOnUpgrade(symlinkOp.path, logVersion),
592          symlinkOp.value, symlinkOp.mtime, symlinkOp.atime,
593          symlinkOp.permissionStatus);
594      
595      if (toAddRetryCache) {
596        fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId);
597      }
598      break;
599    }
600    case OP_RENAME: {
601      RenameOp renameOp = (RenameOp)op;
602      fsDir.unprotectedRenameTo(
603          renameReservedPathsOnUpgrade(renameOp.src, logVersion),
604          renameReservedPathsOnUpgrade(renameOp.dst, logVersion),
605          renameOp.timestamp, renameOp.options);
606      
607      if (toAddRetryCache) {
608        fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
609      }
610      break;
611    }
612    case OP_GET_DELEGATION_TOKEN: {
613      GetDelegationTokenOp getDelegationTokenOp
614        = (GetDelegationTokenOp)op;
615
616      fsNamesys.getDelegationTokenSecretManager()
617        .addPersistedDelegationToken(getDelegationTokenOp.token,
618                                     getDelegationTokenOp.expiryTime);
619      break;
620    }
621    case OP_RENEW_DELEGATION_TOKEN: {
622      RenewDelegationTokenOp renewDelegationTokenOp
623        = (RenewDelegationTokenOp)op;
624      fsNamesys.getDelegationTokenSecretManager()
625        .updatePersistedTokenRenewal(renewDelegationTokenOp.token,
626                                     renewDelegationTokenOp.expiryTime);
627      break;
628    }
629    case OP_CANCEL_DELEGATION_TOKEN: {
630      CancelDelegationTokenOp cancelDelegationTokenOp
631        = (CancelDelegationTokenOp)op;
632      fsNamesys.getDelegationTokenSecretManager()
633          .updatePersistedTokenCancellation(
634              cancelDelegationTokenOp.token);
635      break;
636    }
637    case OP_UPDATE_MASTER_KEY: {
638      UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op;
639      fsNamesys.getDelegationTokenSecretManager()
640        .updatePersistedMasterKey(updateMasterKeyOp.key);
641      break;
642    }
643    case OP_REASSIGN_LEASE: {
644      ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op;
645
646      Lease lease = fsNamesys.leaseManager.getLease(
647          reassignLeaseOp.leaseHolder);
648      final String path =
649          renameReservedPathsOnUpgrade(reassignLeaseOp.path, logVersion);
650      INodeFile pendingFile = fsDir.getINode(path).asFile();
651      Preconditions.checkState(pendingFile.isUnderConstruction());
652      fsNamesys.reassignLeaseInternal(lease,
653          path, reassignLeaseOp.newHolder, pendingFile);
654      break;
655    }
656    case OP_START_LOG_SEGMENT:
657    case OP_END_LOG_SEGMENT: {
658      // no data in here currently.
659      break;
660    }
661    case OP_CREATE_SNAPSHOT: {
662      CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op;
663      final String snapshotRoot =
664          renameReservedPathsOnUpgrade(createSnapshotOp.snapshotRoot,
665              logVersion);
666      String path = fsNamesys.getSnapshotManager().createSnapshot(
667          snapshotRoot, createSnapshotOp.snapshotName);
668      if (toAddRetryCache) {
669        fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId,
670            createSnapshotOp.rpcCallId, path);
671      }
672      break;
673    }
674    case OP_DELETE_SNAPSHOT: {
675      DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op;
676      BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
677      List<INode> removedINodes = new ChunkedArrayList<INode>();
678      final String snapshotRoot =
679          renameReservedPathsOnUpgrade(deleteSnapshotOp.snapshotRoot,
680              logVersion);
681      fsNamesys.getSnapshotManager().deleteSnapshot(
682          snapshotRoot, deleteSnapshotOp.snapshotName,
683          collectedBlocks, removedINodes);
684      fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
685      collectedBlocks.clear();
686      fsNamesys.dir.removeFromInodeMap(removedINodes);
687      removedINodes.clear();
688      
689      if (toAddRetryCache) {
690        fsNamesys.addCacheEntry(deleteSnapshotOp.rpcClientId,
691            deleteSnapshotOp.rpcCallId);
692      }
693      break;
694    }
695    case OP_RENAME_SNAPSHOT: {
696      RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op;
697      final String snapshotRoot =
698          renameReservedPathsOnUpgrade(renameSnapshotOp.snapshotRoot,
699              logVersion);
700      fsNamesys.getSnapshotManager().renameSnapshot(
701          snapshotRoot, renameSnapshotOp.snapshotOldName,
702          renameSnapshotOp.snapshotNewName);
703      
704      if (toAddRetryCache) {
705        fsNamesys.addCacheEntry(renameSnapshotOp.rpcClientId,
706            renameSnapshotOp.rpcCallId);
707      }
708      break;
709    }
710    case OP_ALLOW_SNAPSHOT: {
711      AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op;
712      final String snapshotRoot =
713          renameReservedPathsOnUpgrade(allowSnapshotOp.snapshotRoot, logVersion);
714      fsNamesys.getSnapshotManager().setSnapshottable(
715          snapshotRoot, false);
716      break;
717    }
718    case OP_DISALLOW_SNAPSHOT: {
719      DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op;
720      final String snapshotRoot =
721          renameReservedPathsOnUpgrade(disallowSnapshotOp.snapshotRoot,
722              logVersion);
723      fsNamesys.getSnapshotManager().resetSnapshottable(
724          snapshotRoot);
725      break;
726    }
727    case OP_SET_GENSTAMP_V2: {
728      SetGenstampV2Op setGenstampV2Op = (SetGenstampV2Op) op;
729      fsNamesys.setGenerationStampV2(setGenstampV2Op.genStampV2);
730      break;
731    }
732    case OP_ALLOCATE_BLOCK_ID: {
733      AllocateBlockIdOp allocateBlockIdOp = (AllocateBlockIdOp) op;
734      fsNamesys.setLastAllocatedBlockId(allocateBlockIdOp.blockId);
735      break;
736    }
737    case OP_ROLLING_UPGRADE_START: {
738      if (startOpt == StartupOption.ROLLINGUPGRADE) {
739        final RollingUpgradeStartupOption rollingUpgradeOpt
740            = startOpt.getRollingUpgradeStartupOption(); 
741        if (rollingUpgradeOpt == RollingUpgradeStartupOption.ROLLBACK) {
742          throw new RollingUpgradeOp.RollbackException();
743        } else if (rollingUpgradeOpt == RollingUpgradeStartupOption.DOWNGRADE) {
744          //ignore upgrade marker
745          break;
746        }
747      }
748      // start rolling upgrade
749      final long startTime = ((RollingUpgradeOp) op).getTime();
750      fsNamesys.startRollingUpgradeInternal(startTime);
751      fsNamesys.triggerRollbackCheckpoint();
752      break;
753    }
754    case OP_ROLLING_UPGRADE_FINALIZE: {
755      final long finalizeTime = ((RollingUpgradeOp) op).getTime();
756      if (fsNamesys.isRollingUpgrade()) {
757        // Only do it when NN is actually doing rolling upgrade.
758        // We can get FINALIZE without corresponding START, if NN is restarted
759        // before this op is consumed and a new checkpoint is created.
760        fsNamesys.finalizeRollingUpgradeInternal(finalizeTime);
761      }
762      fsNamesys.getFSImage().updateStorageVersion();
763      fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
764          NameNodeFile.IMAGE);
765      break;
766    }
767    case OP_ADD_CACHE_DIRECTIVE: {
768      AddCacheDirectiveInfoOp addOp = (AddCacheDirectiveInfoOp) op;
769      CacheDirectiveInfo result = fsNamesys.
770          getCacheManager().addDirectiveFromEditLog(addOp.directive);
771      if (toAddRetryCache) {
772        Long id = result.getId();
773        fsNamesys.addCacheEntryWithPayload(op.rpcClientId, op.rpcCallId, id);
774      }
775      break;
776    }
777    case OP_MODIFY_CACHE_DIRECTIVE: {
778      ModifyCacheDirectiveInfoOp modifyOp =
779          (ModifyCacheDirectiveInfoOp) op;
780      fsNamesys.getCacheManager().modifyDirectiveFromEditLog(
781          modifyOp.directive);
782      if (toAddRetryCache) {
783        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
784      }
785      break;
786    }
787    case OP_REMOVE_CACHE_DIRECTIVE: {
788      RemoveCacheDirectiveInfoOp removeOp =
789          (RemoveCacheDirectiveInfoOp) op;
790      fsNamesys.getCacheManager().removeDirective(removeOp.id, null);
791      if (toAddRetryCache) {
792        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
793      }
794      break;
795    }
796    case OP_ADD_CACHE_POOL: {
797      AddCachePoolOp addOp = (AddCachePoolOp) op;
798      fsNamesys.getCacheManager().addCachePool(addOp.info);
799      if (toAddRetryCache) {
800        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
801      }
802      break;
803    }
804    case OP_MODIFY_CACHE_POOL: {
805      ModifyCachePoolOp modifyOp = (ModifyCachePoolOp) op;
806      fsNamesys.getCacheManager().modifyCachePool(modifyOp.info);
807      if (toAddRetryCache) {
808        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
809      }
810      break;
811    }
812    case OP_REMOVE_CACHE_POOL: {
813      RemoveCachePoolOp removeOp = (RemoveCachePoolOp) op;
814      fsNamesys.getCacheManager().removeCachePool(removeOp.poolName);
815      if (toAddRetryCache) {
816        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
817      }
818      break;
819    }
820    case OP_SET_ACL: {
821      SetAclOp setAclOp = (SetAclOp) op;
822      fsDir.unprotectedSetAcl(setAclOp.src, setAclOp.aclEntries);
823      break;
824    }
825    case OP_SET_XATTR: {
826      SetXAttrOp setXAttrOp = (SetXAttrOp) op;
827      fsDir.unprotectedSetXAttrs(setXAttrOp.src, setXAttrOp.xAttrs,
828          EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE));
829      if (toAddRetryCache) {
830        fsNamesys.addCacheEntry(setXAttrOp.rpcClientId, setXAttrOp.rpcCallId);
831      }
832      break;
833    }
834    case OP_REMOVE_XATTR: {
835      RemoveXAttrOp removeXAttrOp = (RemoveXAttrOp) op;
836      fsDir.unprotectedRemoveXAttrs(removeXAttrOp.src,
837          removeXAttrOp.xAttrs);
838      if (toAddRetryCache) {
839        fsNamesys.addCacheEntry(removeXAttrOp.rpcClientId,
840            removeXAttrOp.rpcCallId);
841      }
842      break;
843    }
844    case OP_SET_STORAGE_POLICY: {
845      SetStoragePolicyOp setStoragePolicyOp = (SetStoragePolicyOp) op;
846      fsDir.unprotectedSetStoragePolicy(
847          renameReservedPathsOnUpgrade(setStoragePolicyOp.path, logVersion),
848          setStoragePolicyOp.policyId);
849      break;
850    }
851    default:
852      throw new IOException("Invalid operation read " + op.opCode);
853    }
854    return inodeId;
855  }
856  
857  private static String formatEditLogReplayError(EditLogInputStream in,
858      long recentOpcodeOffsets[], long txid) {
859    StringBuilder sb = new StringBuilder();
860    sb.append("Error replaying edit log at offset " + in.getPosition());
861    sb.append(".  Expected transaction ID was ").append(txid);
862    if (recentOpcodeOffsets[0] != -1) {
863      Arrays.sort(recentOpcodeOffsets);
864      sb.append("\nRecent opcode offsets:");
865      for (long offset : recentOpcodeOffsets) {
866        if (offset != -1) {
867          sb.append(' ').append(offset);
868        }
869      }
870    }
871    return sb.toString();
872  }
873
874  /**
875   * Add a new block into the given INodeFile
876   */
877  private void addNewBlock(FSDirectory fsDir, AddBlockOp op, INodeFile file)
878      throws IOException {
879    BlockInfo[] oldBlocks = file.getBlocks();
880    Block pBlock = op.getPenultimateBlock();
881    Block newBlock= op.getLastBlock();
882    
883    if (pBlock != null) { // the penultimate block is not null
884      Preconditions.checkState(oldBlocks != null && oldBlocks.length > 0);
885      // compare pBlock with the last block of oldBlocks
886      Block oldLastBlock = oldBlocks[oldBlocks.length - 1];
887      if (oldLastBlock.getBlockId() != pBlock.getBlockId()
888          || oldLastBlock.getGenerationStamp() != pBlock.getGenerationStamp()) {
889        throw new IOException(
890            "Mismatched block IDs or generation stamps for the old last block of file "
891                + op.getPath() + ", the old last block is " + oldLastBlock
892                + ", and the block read from editlog is " + pBlock);
893      }
894      
895      oldLastBlock.setNumBytes(pBlock.getNumBytes());
896      if (oldLastBlock instanceof BlockInfoUnderConstruction) {
897        fsNamesys.getBlockManager().forceCompleteBlock(file,
898            (BlockInfoUnderConstruction) oldLastBlock);
899        fsNamesys.getBlockManager().processQueuedMessagesForBlock(pBlock);
900      }
901    } else { // the penultimate block is null
902      Preconditions.checkState(oldBlocks == null || oldBlocks.length == 0);
903    }
904    // add the new block
905    BlockInfo newBI = new BlockInfoUnderConstruction(
906          newBlock, file.getBlockReplication());
907    fsNamesys.getBlockManager().addBlockCollection(newBI, file);
908    file.addBlock(newBI);
909    fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
910  }
911  
912  /**
913   * Update in-memory data structures with new block information.
914   * @throws IOException
915   */
916  private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op,
917      INodeFile file) throws IOException {
918    // Update its block list
919    BlockInfo[] oldBlocks = file.getBlocks();
920    Block[] newBlocks = op.getBlocks();
921    String path = op.getPath();
922    
923    // Are we only updating the last block's gen stamp.
924    boolean isGenStampUpdate = oldBlocks.length == newBlocks.length;
925    
926    // First, update blocks in common
927    for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) {
928      BlockInfo oldBlock = oldBlocks[i];
929      Block newBlock = newBlocks[i];
930      
931      boolean isLastBlock = i == newBlocks.length - 1;
932      if (oldBlock.getBlockId() != newBlock.getBlockId() ||
933          (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 
934              !(isGenStampUpdate && isLastBlock))) {
935        throw new IOException("Mismatched block IDs or generation stamps, " +
936            "attempting to replace block " + oldBlock + " with " + newBlock +
937            " as block # " + i + "/" + newBlocks.length + " of " +
938            path);
939      }
940      
941      oldBlock.setNumBytes(newBlock.getNumBytes());
942      boolean changeMade =
943        oldBlock.getGenerationStamp() != newBlock.getGenerationStamp();
944      oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
945      
946      if (oldBlock instanceof BlockInfoUnderConstruction &&
947          (!isLastBlock || op.shouldCompleteLastBlock())) {
948        changeMade = true;
949        fsNamesys.getBlockManager().forceCompleteBlock(file,
950            (BlockInfoUnderConstruction) oldBlock);
951      }
952      if (changeMade) {
953        // The state or gen-stamp of the block has changed. So, we may be
954        // able to process some messages from datanodes that we previously
955        // were unable to process.
956        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
957      }
958    }
959    
960    if (newBlocks.length < oldBlocks.length) {
961      // We're removing a block from the file, e.g. abandonBlock(...)
962      if (!file.isUnderConstruction()) {
963        throw new IOException("Trying to remove a block from file " +
964            path + " which is not under construction.");
965      }
966      if (newBlocks.length != oldBlocks.length - 1) {
967        throw new IOException("Trying to remove more than one block from file "
968            + path);
969      }
970      Block oldBlock = oldBlocks[oldBlocks.length - 1];
971      boolean removed = fsDir.unprotectedRemoveBlock(path, file, oldBlock);
972      if (!removed && !(op instanceof UpdateBlocksOp)) {
973        throw new IOException("Trying to delete non-existant block " + oldBlock);
974      }
975    } else if (newBlocks.length > oldBlocks.length) {
976      // We're adding blocks
977      for (int i = oldBlocks.length; i < newBlocks.length; i++) {
978        Block newBlock = newBlocks[i];
979        BlockInfo newBI;
980        if (!op.shouldCompleteLastBlock()) {
981          // TODO: shouldn't this only be true for the last block?
982          // what about an old-version fsync() where fsync isn't called
983          // until several blocks in?
984          newBI = new BlockInfoUnderConstruction(
985              newBlock, file.getBlockReplication());
986        } else {
987          // OP_CLOSE should add finalized blocks. This code path
988          // is only executed when loading edits written by prior
989          // versions of Hadoop. Current versions always log
990          // OP_ADD operations as each block is allocated.
991          newBI = new BlockInfo(newBlock, file.getBlockReplication());
992        }
993        fsNamesys.getBlockManager().addBlockCollection(newBI, file);
994        file.addBlock(newBI);
995        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
996      }
997    }
998  }
999
1000  private static void dumpOpCounts(
1001      EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) {
1002    StringBuilder sb = new StringBuilder();
1003    sb.append("Summary of operations loaded from edit log:\n  ");
1004    Joiner.on("\n  ").withKeyValueSeparator("=").appendTo(sb, opCounts);
1005    FSImage.LOG.debug(sb.toString());
1006  }
1007
1008  private void incrOpCount(FSEditLogOpCodes opCode,
1009      EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts, Step step,
1010      Counter counter) {
1011    Holder<Integer> holder = opCounts.get(opCode);
1012    if (holder == null) {
1013      holder = new Holder<Integer>(1);
1014      opCounts.put(opCode, holder);
1015    } else {
1016      holder.held++;
1017    }
1018    counter.increment();
1019  }
1020
1021  /**
1022   * Throw appropriate exception during upgrade from 203, when editlog loading
1023   * could fail due to opcode conflicts.
1024   */
1025  private void check203UpgradeFailure(int logVersion, Throwable e)
1026      throws IOException {
1027    // 0.20.203 version version has conflicting opcodes with the later releases.
1028    // The editlog must be emptied by restarting the namenode, before proceeding
1029    // with the upgrade.
1030    if (Storage.is203LayoutVersion(logVersion)
1031        && logVersion != HdfsConstants.NAMENODE_LAYOUT_VERSION) {
1032      String msg = "During upgrade failed to load the editlog version "
1033          + logVersion + " from release 0.20.203. Please go back to the old "
1034          + " release and restart the namenode. This empties the editlog "
1035          + " and saves the namespace. Resume the upgrade after this step.";
1036      throw new IOException(msg, e);
1037    }
1038  }
1039  
1040  /**
1041   * Find the last valid transaction ID in the stream.
1042   * If there are invalid or corrupt transactions in the middle of the stream,
1043   * validateEditLog will skip over them.
1044   * This reads through the stream but does not close it.
1045   */
1046  static EditLogValidation validateEditLog(EditLogInputStream in) {
1047    long lastPos = 0;
1048    long lastTxId = HdfsConstants.INVALID_TXID;
1049    long numValid = 0;
1050    FSEditLogOp op = null;
1051    while (true) {
1052      lastPos = in.getPosition();
1053      try {
1054        if ((op = in.readOp()) == null) {
1055          break;
1056        }
1057      } catch (Throwable t) {
1058        FSImage.LOG.warn("Caught exception after reading " + numValid +
1059            " ops from " + in + " while determining its valid length." +
1060            "Position was " + lastPos, t);
1061        in.resync();
1062        FSImage.LOG.warn("After resync, position is " + in.getPosition());
1063        continue;
1064      }
1065      if (lastTxId == HdfsConstants.INVALID_TXID
1066          || op.getTransactionId() > lastTxId) {
1067        lastTxId = op.getTransactionId();
1068      }
1069      numValid++;
1070    }
1071    return new EditLogValidation(lastPos, lastTxId, false);
1072  }
1073
1074  static EditLogValidation scanEditLog(EditLogInputStream in) {
1075    long lastPos = 0;
1076    long lastTxId = HdfsConstants.INVALID_TXID;
1077    long numValid = 0;
1078    FSEditLogOp op = null;
1079    while (true) {
1080      lastPos = in.getPosition();
1081      try {
1082        if ((op = in.readOp()) == null) { // TODO
1083          break;
1084        }
1085      } catch (Throwable t) {
1086        FSImage.LOG.warn("Caught exception after reading " + numValid +
1087            " ops from " + in + " while determining its valid length." +
1088            "Position was " + lastPos, t);
1089        in.resync();
1090        FSImage.LOG.warn("After resync, position is " + in.getPosition());
1091        continue;
1092      }
1093      if (lastTxId == HdfsConstants.INVALID_TXID
1094          || op.getTransactionId() > lastTxId) {
1095        lastTxId = op.getTransactionId();
1096      }
1097      numValid++;
1098    }
1099    return new EditLogValidation(lastPos, lastTxId, false);
1100  }
1101
1102  static class EditLogValidation {
1103    private final long validLength;
1104    private final long endTxId;
1105    private final boolean hasCorruptHeader;
1106
1107    EditLogValidation(long validLength, long endTxId,
1108        boolean hasCorruptHeader) {
1109      this.validLength = validLength;
1110      this.endTxId = endTxId;
1111      this.hasCorruptHeader = hasCorruptHeader;
1112    }
1113
1114    long getValidLength() { return validLength; }
1115
1116    long getEndTxId() { return endTxId; }
1117
1118    boolean hasCorruptHeader() { return hasCorruptHeader; }
1119  }
1120
1121  /**
1122   * Stream wrapper that keeps track of the current stream position.
1123   * 
1124   * This stream also allows us to set a limit on how many bytes we can read
1125   * without getting an exception.
1126   */
1127  public static class PositionTrackingInputStream extends FilterInputStream
1128      implements StreamLimiter {
1129    private long curPos = 0;
1130    private long markPos = -1;
1131    private long limitPos = Long.MAX_VALUE;
1132
1133    public PositionTrackingInputStream(InputStream is) {
1134      super(is);
1135    }
1136
1137    private void checkLimit(long amt) throws IOException {
1138      long extra = (curPos + amt) - limitPos;
1139      if (extra > 0) {
1140        throw new IOException("Tried to read " + amt + " byte(s) past " +
1141            "the limit at offset " + limitPos);
1142      }
1143    }
1144    
1145    @Override
1146    public int read() throws IOException {
1147      checkLimit(1);
1148      int ret = super.read();
1149      if (ret != -1) curPos++;
1150      return ret;
1151    }
1152
1153    @Override
1154    public int read(byte[] data) throws IOException {
1155      checkLimit(data.length);
1156      int ret = super.read(data);
1157      if (ret > 0) curPos += ret;
1158      return ret;
1159    }
1160
1161    @Override
1162    public int read(byte[] data, int offset, int length) throws IOException {
1163      checkLimit(length);
1164      int ret = super.read(data, offset, length);
1165      if (ret > 0) curPos += ret;
1166      return ret;
1167    }
1168
1169    @Override
1170    public void setLimit(long limit) {
1171      limitPos = curPos + limit;
1172    }
1173
1174    @Override
1175    public void clearLimit() {
1176      limitPos = Long.MAX_VALUE;
1177    }
1178
1179    @Override
1180    public void mark(int limit) {
1181      super.mark(limit);
1182      markPos = curPos;
1183    }
1184
1185    @Override
1186    public void reset() throws IOException {
1187      if (markPos == -1) {
1188        throw new IOException("Not marked!");
1189      }
1190      super.reset();
1191      curPos = markPos;
1192      markPos = -1;
1193    }
1194
1195    public long getPos() {
1196      return curPos;
1197    }
1198    
1199    @Override
1200    public long skip(long amt) throws IOException {
1201      long extra = (curPos + amt) - limitPos;
1202      if (extra > 0) {
1203        throw new IOException("Tried to skip " + extra + " bytes past " +
1204            "the limit at offset " + limitPos);
1205      }
1206      long ret = super.skip(amt);
1207      curPos += ret;
1208      return ret;
1209    }
1210  }
1211
1212  public long getLastAppliedTxId() {
1213    return lastAppliedTxId;
1214  }
1215
1216  /**
1217   * Creates a Step used for updating startup progress, populated with
1218   * information from the given edits.  The step always includes the log's name.
1219   * If the log has a known length, then the length is included in the step too.
1220   * 
1221   * @param edits EditLogInputStream to use for populating step
1222   * @return Step populated with information from edits
1223   * @throws IOException thrown if there is an I/O error
1224   */
1225  private static Step createStartupProgressStep(EditLogInputStream edits)
1226      throws IOException {
1227    long length = edits.length();
1228    String name = edits.getCurrentStreamName();
1229    return length != -1 ? new Step(name, length) : new Step(name);
1230  }
1231}