001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import static org.apache.hadoop.util.Time.now;
021    
022    import java.io.File;
023    import java.io.FilterInputStream;
024    import java.io.IOException;
025    import java.io.InputStream;
026    import java.util.Arrays;
027    import java.util.EnumMap;
028    
029    import org.apache.commons.logging.Log;
030    import org.apache.commons.logging.LogFactory;
031    import org.apache.hadoop.classification.InterfaceAudience;
032    import org.apache.hadoop.classification.InterfaceStability;
033    import org.apache.hadoop.hdfs.protocol.Block;
034    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
035    import org.apache.hadoop.hdfs.protocol.LayoutVersion;
036    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
037    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
038    import org.apache.hadoop.hdfs.server.common.Storage;
039    import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream.LogHeaderCorruptException;
040    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp;
041    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp;
042    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp;
043    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp;
044    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp;
045    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
046    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp;
047    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
048    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp;
049    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp;
050    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp;
051    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp;
052    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampOp;
053    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp;
054    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp;
055    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp;
056    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp;
057    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp;
058    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp;
059    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
060    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
061    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
062    import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
063    import org.apache.hadoop.hdfs.util.Holder;
064    
065    import com.google.common.base.Joiner;
066    
067    @InterfaceAudience.Private
068    @InterfaceStability.Evolving
069    public class FSEditLogLoader {
070      static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName());
071      static long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec
072      private final FSNamesystem fsNamesys;
073      private long lastAppliedTxId;
074      
075      public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) {
076        this.fsNamesys = fsNamesys;
077        this.lastAppliedTxId = lastAppliedTxId;
078      }
079      
080      /**
081       * Load an edit log, and apply the changes to the in-memory structure
082       * This is where we apply edits that we've been writing to disk all
083       * along.
084       */
085      long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId,
086          MetaRecoveryContext recovery) throws IOException {
087        fsNamesys.writeLock();
088        try {
089          long startTime = now();
090          long numEdits = loadEditRecords(edits, false, 
091                                     expectedStartingTxId, recovery);
092          FSImage.LOG.info("Edits file " + edits.getName() 
093              + " of size " + edits.length() + " edits # " + numEdits 
094              + " loaded in " + (now()-startTime)/1000 + " seconds");
095          return numEdits;
096        } finally {
097          edits.close();
098          fsNamesys.writeUnlock();
099        }
100      }
101    
102      long loadEditRecords(EditLogInputStream in, boolean closeOnExit,
103                          long expectedStartingTxId, MetaRecoveryContext recovery)
104          throws IOException {
105        FSDirectory fsDir = fsNamesys.dir;
106    
107        EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
108          new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
109    
110        if (LOG.isTraceEnabled()) {
111          LOG.trace("Acquiring write lock to replay edit log");
112        }
113    
114        fsNamesys.writeLock();
115        fsDir.writeLock();
116    
117        long recentOpcodeOffsets[] = new long[4];
118        Arrays.fill(recentOpcodeOffsets, -1);
119        
120        long expectedTxId = expectedStartingTxId;
121        long numEdits = 0;
122        long lastTxId = in.getLastTxId();
123        long numTxns = (lastTxId - expectedStartingTxId) + 1;
124        long lastLogTime = now();
125    
126        try {
127          while (true) {
128            try {
129              FSEditLogOp op;
130              try {
131                op = in.readOp();
132                if (op == null) {
133                  break;
134                }
135              } catch (Throwable e) {
136                // Handle a problem with our input
137                check203UpgradeFailure(in.getVersion(), e);
138                String errorMessage =
139                  formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
140                FSImage.LOG.error(errorMessage, e);
141                if (recovery == null) {
142                   // We will only try to skip over problematic opcodes when in
143                   // recovery mode.
144                  throw new EditLogInputException(errorMessage, e, numEdits);
145                }
146                MetaRecoveryContext.editLogLoaderPrompt(
147                    "We failed to read txId " + expectedTxId,
148                    recovery, "skipping the bad section in the log");
149                in.resync();
150                continue;
151              }
152              recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] =
153                in.getPosition();
154              if (op.hasTransactionId()) {
155                if (op.getTransactionId() > expectedTxId) { 
156                  MetaRecoveryContext.editLogLoaderPrompt("There appears " +
157                      "to be a gap in the edit log.  We expected txid " +
158                      expectedTxId + ", but got txid " +
159                      op.getTransactionId() + ".", recovery, "ignoring missing " +
160                      " transaction IDs");
161                } else if (op.getTransactionId() < expectedTxId) { 
162                  MetaRecoveryContext.editLogLoaderPrompt("There appears " +
163                      "to be an out-of-order edit in the edit log.  We " +
164                      "expected txid " + expectedTxId + ", but got txid " +
165                      op.getTransactionId() + ".", recovery,
166                      "skipping the out-of-order edit");
167                  continue;
168                }
169              }
170              try {
171                applyEditLogOp(op, fsDir, in.getVersion());
172              } catch (Throwable e) {
173                LOG.error("Encountered exception on operation " + op, e);
174                MetaRecoveryContext.editLogLoaderPrompt("Failed to " +
175                 "apply edit log operation " + op + ": error " +
176                 e.getMessage(), recovery, "applying edits");
177              }
178              // Now that the operation has been successfully decoded and
179              // applied, update our bookkeeping.
180              incrOpCount(op.opCode, opCounts);
181              if (op.hasTransactionId()) {
182                lastAppliedTxId = op.getTransactionId();
183                expectedTxId = lastAppliedTxId + 1;
184              } else {
185                expectedTxId = lastAppliedTxId = expectedStartingTxId;
186              }
187              // log progress
188              if (op.hasTransactionId()) {
189                long now = now();
190                if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
191                  long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
192                  int percent = Math.round((float) deltaTxId / numTxns * 100);
193                  LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns
194                      + " transactions completed. (" + percent + "%)");
195                  lastLogTime = now;
196                }
197              }
198              numEdits++;
199            } catch (MetaRecoveryContext.RequestStopException e) {
200              MetaRecoveryContext.LOG.warn("Stopped reading edit log at " +
201                  in.getPosition() + "/"  + in.length());
202              break;
203            }
204          }
205        } finally {
206          if(closeOnExit) {
207            in.close();
208          }
209          fsDir.writeUnlock();
210          fsNamesys.writeUnlock();
211    
212          if (LOG.isTraceEnabled()) {
213            LOG.trace("replaying edit log finished");
214          }
215    
216          if (FSImage.LOG.isDebugEnabled()) {
217            dumpOpCounts(opCounts);
218          }
219        }
220        return numEdits;
221      }
222      
223      @SuppressWarnings("deprecation")
224      private void applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
225          int logVersion) throws IOException {
226    
227        if (LOG.isTraceEnabled()) {
228          LOG.trace("replaying edit log: " + op);
229        }
230    
231        switch (op.opCode) {
232        case OP_ADD: {
233          AddCloseOp addCloseOp = (AddCloseOp)op;
234          if (FSNamesystem.LOG.isDebugEnabled()) {
235            FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
236                " numblocks : " + addCloseOp.blocks.length +
237                " clientHolder " + addCloseOp.clientName +
238                " clientMachine " + addCloseOp.clientMachine);
239          }
240          // There three cases here:
241          // 1. OP_ADD to create a new file
242          // 2. OP_ADD to update file blocks
243          // 3. OP_ADD to open file for append
244    
245          // See if the file already exists (persistBlocks call)
246          INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path);
247          INodeFile newFile = oldFile;
248          if (oldFile == null) { // this is OP_ADD on a new file (case 1)
249            // versions > 0 support per file replication
250            // get name and replication
251            final short replication  = fsNamesys.getBlockManager(
252                ).adjustReplication(addCloseOp.replication);
253            assert addCloseOp.blocks.length == 0;
254    
255            // add to the file tree
256            newFile = (INodeFile)fsDir.unprotectedAddFile(
257                addCloseOp.path, addCloseOp.permissions,
258                replication, addCloseOp.mtime,
259                addCloseOp.atime, addCloseOp.blockSize,
260                true, addCloseOp.clientName, addCloseOp.clientMachine);
261            fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path);
262    
263          } else { // This is OP_ADD on an existing file
264            if (!oldFile.isUnderConstruction()) {
265              // This is case 3: a call to append() on an already-closed file.
266              if (FSNamesystem.LOG.isDebugEnabled()) {
267                FSNamesystem.LOG.debug("Reopening an already-closed file " +
268                    "for append");
269              }
270              fsNamesys.prepareFileForWrite(addCloseOp.path, oldFile,
271                  addCloseOp.clientName, addCloseOp.clientMachine, null,
272                  false);
273              newFile = getINodeFile(fsDir, addCloseOp.path);
274            }
275          }
276          // Fall-through for case 2.
277          // Regardless of whether it's a new file or an updated file,
278          // update the block list.
279          
280          // Update the salient file attributes.
281          newFile.setAccessTime(addCloseOp.atime);
282          newFile.setModificationTimeForce(addCloseOp.mtime);
283          updateBlocks(fsDir, addCloseOp, newFile);
284          break;
285        }
286        case OP_CLOSE: {
287          AddCloseOp addCloseOp = (AddCloseOp)op;
288          
289          if (FSNamesystem.LOG.isDebugEnabled()) {
290            FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
291                " numblocks : " + addCloseOp.blocks.length +
292                " clientHolder " + addCloseOp.clientName +
293                " clientMachine " + addCloseOp.clientMachine);
294          }
295    
296          INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path);
297          if (oldFile == null) {
298            throw new IOException("Operation trying to close non-existent file " +
299                addCloseOp.path);
300          }
301          
302          // Update the salient file attributes.
303          oldFile.setAccessTime(addCloseOp.atime);
304          oldFile.setModificationTimeForce(addCloseOp.mtime);
305          updateBlocks(fsDir, addCloseOp, oldFile);
306    
307          // Now close the file
308          if (!oldFile.isUnderConstruction() &&
309              logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
310            // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
311            // could show up twice in a row. But after that version, this
312            // should be fixed, so we should treat it as an error.
313            throw new IOException(
314                "File is not under construction: " + addCloseOp.path);
315          }
316          // One might expect that you could use removeLease(holder, path) here,
317          // but OP_CLOSE doesn't serialize the holder. So, remove by path.
318          if (oldFile.isUnderConstruction()) {
319            INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile;
320            fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path);
321            INodeFile newFile = ucFile.convertToInodeFile();
322            fsDir.replaceNode(addCloseOp.path, ucFile, newFile);
323          }
324          break;
325        }
326        case OP_UPDATE_BLOCKS: {
327          UpdateBlocksOp updateOp = (UpdateBlocksOp)op;
328          if (FSNamesystem.LOG.isDebugEnabled()) {
329            FSNamesystem.LOG.debug(op.opCode + ": " + updateOp.path +
330                " numblocks : " + updateOp.blocks.length);
331          }
332          INodeFile oldFile = getINodeFile(fsDir, updateOp.path);
333          if (oldFile == null) {
334            throw new IOException(
335                "Operation trying to update blocks in non-existent file " +
336                updateOp.path);
337          }
338          
339          // Update in-memory data structures
340          updateBlocks(fsDir, updateOp, oldFile);
341          break;
342        }
343          
344        case OP_SET_REPLICATION: {
345          SetReplicationOp setReplicationOp = (SetReplicationOp)op;
346          short replication = fsNamesys.getBlockManager().adjustReplication(
347              setReplicationOp.replication);
348          fsDir.unprotectedSetReplication(setReplicationOp.path,
349                                          replication, null);
350          break;
351        }
352        case OP_CONCAT_DELETE: {
353          ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
354          fsDir.unprotectedConcat(concatDeleteOp.trg, concatDeleteOp.srcs,
355              concatDeleteOp.timestamp);
356          break;
357        }
358        case OP_RENAME_OLD: {
359          RenameOldOp renameOp = (RenameOldOp)op;
360          fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
361                                    renameOp.timestamp);
362          break;
363        }
364        case OP_DELETE: {
365          DeleteOp deleteOp = (DeleteOp)op;
366          fsDir.unprotectedDelete(deleteOp.path, deleteOp.timestamp);
367          break;
368        }
369        case OP_MKDIR: {
370          MkdirOp mkdirOp = (MkdirOp)op;
371          fsDir.unprotectedMkdir(mkdirOp.path, mkdirOp.permissions,
372                                 mkdirOp.timestamp);
373          break;
374        }
375        case OP_SET_GENSTAMP: {
376          SetGenstampOp setGenstampOp = (SetGenstampOp)op;
377          fsNamesys.setGenerationStamp(setGenstampOp.genStamp);
378          break;
379        }
380        case OP_SET_PERMISSIONS: {
381          SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
382          fsDir.unprotectedSetPermission(setPermissionsOp.src,
383                                         setPermissionsOp.permissions);
384          break;
385        }
386        case OP_SET_OWNER: {
387          SetOwnerOp setOwnerOp = (SetOwnerOp)op;
388          fsDir.unprotectedSetOwner(setOwnerOp.src, setOwnerOp.username,
389                                    setOwnerOp.groupname);
390          break;
391        }
392        case OP_SET_NS_QUOTA: {
393          SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
394          fsDir.unprotectedSetQuota(setNSQuotaOp.src,
395                                    setNSQuotaOp.nsQuota,
396                                    HdfsConstants.QUOTA_DONT_SET);
397          break;
398        }
399        case OP_CLEAR_NS_QUOTA: {
400          ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
401          fsDir.unprotectedSetQuota(clearNSQuotaOp.src,
402                                    HdfsConstants.QUOTA_RESET,
403                                    HdfsConstants.QUOTA_DONT_SET);
404          break;
405        }
406    
407        case OP_SET_QUOTA:
408          SetQuotaOp setQuotaOp = (SetQuotaOp)op;
409          fsDir.unprotectedSetQuota(setQuotaOp.src,
410                                    setQuotaOp.nsQuota,
411                                    setQuotaOp.dsQuota);
412          break;
413    
414        case OP_TIMES: {
415          TimesOp timesOp = (TimesOp)op;
416    
417          fsDir.unprotectedSetTimes(timesOp.path,
418                                    timesOp.mtime,
419                                    timesOp.atime, true);
420          break;
421        }
422        case OP_SYMLINK: {
423          SymlinkOp symlinkOp = (SymlinkOp)op;
424          fsDir.unprotectedAddSymlink(symlinkOp.path, symlinkOp.value,
425                                   symlinkOp.mtime, symlinkOp.atime,
426                                   symlinkOp.permissionStatus);
427          break;
428        }
429        case OP_RENAME: {
430          RenameOp renameOp = (RenameOp)op;
431          fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
432                                    renameOp.timestamp, renameOp.options);
433          break;
434        }
435        case OP_GET_DELEGATION_TOKEN: {
436          GetDelegationTokenOp getDelegationTokenOp
437            = (GetDelegationTokenOp)op;
438    
439          fsNamesys.getDelegationTokenSecretManager()
440            .addPersistedDelegationToken(getDelegationTokenOp.token,
441                                         getDelegationTokenOp.expiryTime);
442          break;
443        }
444        case OP_RENEW_DELEGATION_TOKEN: {
445          RenewDelegationTokenOp renewDelegationTokenOp
446            = (RenewDelegationTokenOp)op;
447          fsNamesys.getDelegationTokenSecretManager()
448            .updatePersistedTokenRenewal(renewDelegationTokenOp.token,
449                                         renewDelegationTokenOp.expiryTime);
450          break;
451        }
452        case OP_CANCEL_DELEGATION_TOKEN: {
453          CancelDelegationTokenOp cancelDelegationTokenOp
454            = (CancelDelegationTokenOp)op;
455          fsNamesys.getDelegationTokenSecretManager()
456              .updatePersistedTokenCancellation(
457                  cancelDelegationTokenOp.token);
458          break;
459        }
460        case OP_UPDATE_MASTER_KEY: {
461          UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op;
462          fsNamesys.getDelegationTokenSecretManager()
463            .updatePersistedMasterKey(updateMasterKeyOp.key);
464          break;
465        }
466        case OP_REASSIGN_LEASE: {
467          ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op;
468    
469          Lease lease = fsNamesys.leaseManager.getLease(
470              reassignLeaseOp.leaseHolder);
471          INodeFileUnderConstruction pendingFile =
472              INodeFileUnderConstruction.valueOf( 
473                  fsDir.getINode(reassignLeaseOp.path), reassignLeaseOp.path);
474          fsNamesys.reassignLeaseInternal(lease,
475              reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile);
476          break;
477        }
478        case OP_START_LOG_SEGMENT:
479        case OP_END_LOG_SEGMENT: {
480          // no data in here currently.
481          break;
482        }
483        default:
484          throw new IOException("Invalid operation read " + op.opCode);
485        }
486      }
487      
488      private static String formatEditLogReplayError(EditLogInputStream in,
489          long recentOpcodeOffsets[], long txid) {
490        StringBuilder sb = new StringBuilder();
491        sb.append("Error replaying edit log at offset " + in.getPosition());
492        sb.append(".  Expected transaction ID was ").append(txid);
493        if (recentOpcodeOffsets[0] != -1) {
494          Arrays.sort(recentOpcodeOffsets);
495          sb.append("\nRecent opcode offsets:");
496          for (long offset : recentOpcodeOffsets) {
497            if (offset != -1) {
498              sb.append(' ').append(offset);
499            }
500          }
501        }
502        return sb.toString();
503      }
504      
505      private static INodeFile getINodeFile(FSDirectory fsDir, String path)
506          throws IOException {
507        INode inode = fsDir.getINode(path);
508        if (inode != null) {
509          if (!(inode instanceof INodeFile)) {
510            throw new IOException("Operation trying to get non-file " + path);
511          }
512        }
513        return (INodeFile)inode;
514      }
515      
516      /**
517       * Update in-memory data structures with new block information.
518       * @throws IOException
519       */
520      private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op,
521          INodeFile file) throws IOException {
522        // Update its block list
523        BlockInfo[] oldBlocks = file.getBlocks();
524        Block[] newBlocks = op.getBlocks();
525        String path = op.getPath();
526        
527        // Are we only updating the last block's gen stamp.
528        boolean isGenStampUpdate = oldBlocks.length == newBlocks.length;
529        
530        // First, update blocks in common
531        for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) {
532          BlockInfo oldBlock = oldBlocks[i];
533          Block newBlock = newBlocks[i];
534          
535          boolean isLastBlock = i == newBlocks.length - 1;
536          if (oldBlock.getBlockId() != newBlock.getBlockId() ||
537              (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 
538                  !(isGenStampUpdate && isLastBlock))) {
539            throw new IOException("Mismatched block IDs or generation stamps, " +
540                "attempting to replace block " + oldBlock + " with " + newBlock +
541                " as block # " + i + "/" + newBlocks.length + " of " +
542                path);
543          }
544          
545          oldBlock.setNumBytes(newBlock.getNumBytes());
546          boolean changeMade =
547            oldBlock.getGenerationStamp() != newBlock.getGenerationStamp();
548          oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
549          
550          if (oldBlock instanceof BlockInfoUnderConstruction &&
551              (!isLastBlock || op.shouldCompleteLastBlock())) {
552            changeMade = true;
553            fsNamesys.getBlockManager().forceCompleteBlock(
554                (INodeFileUnderConstruction)file,
555                (BlockInfoUnderConstruction)oldBlock);
556          }
557          if (changeMade) {
558            // The state or gen-stamp of the block has changed. So, we may be
559            // able to process some messages from datanodes that we previously
560            // were unable to process.
561            fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
562          }
563        }
564        
565        if (newBlocks.length < oldBlocks.length) {
566          // We're removing a block from the file, e.g. abandonBlock(...)
567          if (!file.isUnderConstruction()) {
568            throw new IOException("Trying to remove a block from file " +
569                path + " which is not under construction.");
570          }
571          if (newBlocks.length != oldBlocks.length - 1) {
572            throw new IOException("Trying to remove more than one block from file "
573                + path);
574          }
575          fsDir.unprotectedRemoveBlock(path,
576              (INodeFileUnderConstruction)file, oldBlocks[oldBlocks.length - 1]);
577        } else if (newBlocks.length > oldBlocks.length) {
578          // We're adding blocks
579          for (int i = oldBlocks.length; i < newBlocks.length; i++) {
580            Block newBlock = newBlocks[i];
581            BlockInfo newBI;
582            if (!op.shouldCompleteLastBlock()) {
583              // TODO: shouldn't this only be true for the last block?
584              // what about an old-version fsync() where fsync isn't called
585              // until several blocks in?
586              newBI = new BlockInfoUnderConstruction(
587                  newBlock, file.getBlockReplication());
588            } else {
589              // OP_CLOSE should add finalized blocks. This code path
590              // is only executed when loading edits written by prior
591              // versions of Hadoop. Current versions always log
592              // OP_ADD operations as each block is allocated.
593              newBI = new BlockInfo(newBlock, file.getBlockReplication());
594            }
595            fsNamesys.getBlockManager().addBlockCollection(newBI, file);
596            file.addBlock(newBI);
597            fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
598          }
599        }
600      }
601    
602      private static void dumpOpCounts(
603          EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) {
604        StringBuilder sb = new StringBuilder();
605        sb.append("Summary of operations loaded from edit log:\n  ");
606        Joiner.on("\n  ").withKeyValueSeparator("=").appendTo(sb, opCounts);
607        FSImage.LOG.debug(sb.toString());
608      }
609    
610      private void incrOpCount(FSEditLogOpCodes opCode,
611          EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) {
612        Holder<Integer> holder = opCounts.get(opCode);
613        if (holder == null) {
614          holder = new Holder<Integer>(1);
615          opCounts.put(opCode, holder);
616        } else {
617          holder.held++;
618        }
619      }
620    
621      /**
622       * Throw appropriate exception during upgrade from 203, when editlog loading
623       * could fail due to opcode conflicts.
624       */
625      private void check203UpgradeFailure(int logVersion, Throwable e)
626          throws IOException {
627        // 0.20.203 version version has conflicting opcodes with the later releases.
628        // The editlog must be emptied by restarting the namenode, before proceeding
629        // with the upgrade.
630        if (Storage.is203LayoutVersion(logVersion)
631            && logVersion != HdfsConstants.LAYOUT_VERSION) {
632          String msg = "During upgrade failed to load the editlog version "
633              + logVersion + " from release 0.20.203. Please go back to the old "
634              + " release and restart the namenode. This empties the editlog "
635              + " and saves the namespace. Resume the upgrade after this step.";
636          throw new IOException(msg, e);
637        }
638      }
639      
640      /**
641       * Find the last valid transaction ID in the stream.
642       * If there are invalid or corrupt transactions in the middle of the stream,
643       * validateEditLog will skip over them.
644       * This reads through the stream but does not close it.
645       *
646       * @throws IOException if the stream cannot be read due to an IO error (eg
647       *                     if the log does not exist)
648       */
649      static EditLogValidation validateEditLog(EditLogInputStream in) {
650        long lastPos = 0;
651        long lastTxId = HdfsConstants.INVALID_TXID;
652        long numValid = 0;
653        FSEditLogOp op = null;
654        while (true) {
655          lastPos = in.getPosition();
656          try {
657            if ((op = in.readOp()) == null) {
658              break;
659            }
660          } catch (Throwable t) {
661            FSImage.LOG.warn("Caught exception after reading " + numValid +
662                " ops from " + in + " while determining its valid length." +
663                "Position was " + lastPos, t);
664            in.resync();
665            FSImage.LOG.warn("After resync, position is " + in.getPosition());
666            continue;
667          }
668          if (lastTxId == HdfsConstants.INVALID_TXID
669              || op.getTransactionId() > lastTxId) {
670            lastTxId = op.getTransactionId();
671          }
672          numValid++;
673        }
674        return new EditLogValidation(lastPos, lastTxId, false);
675      }
676    
677      static class EditLogValidation {
678        private final long validLength;
679        private final long endTxId;
680        private final boolean hasCorruptHeader;
681    
682        EditLogValidation(long validLength, long endTxId,
683            boolean hasCorruptHeader) {
684          this.validLength = validLength;
685          this.endTxId = endTxId;
686          this.hasCorruptHeader = hasCorruptHeader;
687        }
688    
689        long getValidLength() { return validLength; }
690    
691        long getEndTxId() { return endTxId; }
692    
693        boolean hasCorruptHeader() { return hasCorruptHeader; }
694      }
695    
696      /**
697       * Stream wrapper that keeps track of the current stream position.
698       * 
699       * This stream also allows us to set a limit on how many bytes we can read
700       * without getting an exception.
701       */
702      public static class PositionTrackingInputStream extends FilterInputStream
703          implements StreamLimiter {
704        private long curPos = 0;
705        private long markPos = -1;
706        private long limitPos = Long.MAX_VALUE;
707    
708        public PositionTrackingInputStream(InputStream is) {
709          super(is);
710        }
711    
712        private void checkLimit(long amt) throws IOException {
713          long extra = (curPos + amt) - limitPos;
714          if (extra > 0) {
715            throw new IOException("Tried to read " + amt + " byte(s) past " +
716                "the limit at offset " + limitPos);
717          }
718        }
719        
720        @Override
721        public int read() throws IOException {
722          checkLimit(1);
723          int ret = super.read();
724          if (ret != -1) curPos++;
725          return ret;
726        }
727    
728        @Override
729        public int read(byte[] data) throws IOException {
730          checkLimit(data.length);
731          int ret = super.read(data);
732          if (ret > 0) curPos += ret;
733          return ret;
734        }
735    
736        @Override
737        public int read(byte[] data, int offset, int length) throws IOException {
738          checkLimit(length);
739          int ret = super.read(data, offset, length);
740          if (ret > 0) curPos += ret;
741          return ret;
742        }
743    
744        @Override
745        public void setLimit(long limit) {
746          limitPos = curPos + limit;
747        }
748    
749        @Override
750        public void clearLimit() {
751          limitPos = Long.MAX_VALUE;
752        }
753    
754        @Override
755        public void mark(int limit) {
756          super.mark(limit);
757          markPos = curPos;
758        }
759    
760        @Override
761        public void reset() throws IOException {
762          if (markPos == -1) {
763            throw new IOException("Not marked!");
764          }
765          super.reset();
766          curPos = markPos;
767          markPos = -1;
768        }
769    
770        public long getPos() {
771          return curPos;
772        }
773        
774        @Override
775        public long skip(long amt) throws IOException {
776          long extra = (curPos + amt) - limitPos;
777          if (extra > 0) {
778            throw new IOException("Tried to skip " + extra + " bytes past " +
779                "the limit at offset " + limitPos);
780          }
781          long ret = super.skip(amt);
782          curPos += ret;
783          return ret;
784        }
785      }
786    
787      public long getLastAppliedTxId() {
788        return lastAppliedTxId;
789      }
790    }