001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
021    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
022    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT;
023    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;
024    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT;
025    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
026    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT;
027    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY;
028    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT;
029    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY;
030    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT;
031    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY;
032    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT;
033    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY;
034    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
035    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
036    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
037    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
038    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
039    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT;
040    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY;
041    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT;
042    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY;
043    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
044    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
045    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
046    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
047    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
048    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
049    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY;
050    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT;
051    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY;
052    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT;
053    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
054    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
055    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
056    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
057    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT;
058    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
059    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT;
060    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT;
061    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY;
062    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
063    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY;
064    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
065    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT;
066    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY;
067    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY;
068    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
069    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
070    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT;
071    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY;
072    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
073    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY;
074    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY;
075    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT;
076    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY;
077    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT;
078    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY;
079    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
080    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
081    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
082    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
083    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
084    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
085    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
086    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT;
087    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY;
088    import static org.apache.hadoop.util.Time.now;
089    
090    import java.io.BufferedWriter;
091    import java.io.ByteArrayInputStream;
092    import java.io.DataInput;
093    import java.io.DataInputStream;
094    import java.io.DataOutputStream;
095    import java.io.File;
096    import java.io.FileNotFoundException;
097    import java.io.FileOutputStream;
098    import java.io.IOException;
099    import java.io.OutputStreamWriter;
100    import java.io.PrintWriter;
101    import java.io.StringWriter;
102    import java.lang.management.ManagementFactory;
103    import java.net.InetAddress;
104    import java.net.URI;
105    import java.util.ArrayList;
106    import java.util.Arrays;
107    import java.util.Collection;
108    import java.util.Collections;
109    import java.util.Date;
110    import java.util.EnumSet;
111    import java.util.HashMap;
112    import java.util.HashSet;
113    import java.util.Iterator;
114    import java.util.LinkedHashSet;
115    import java.util.List;
116    import java.util.Map;
117    import java.util.Set;
118    import java.util.concurrent.TimeUnit;
119    import java.util.concurrent.locks.ReentrantReadWriteLock;
120    
121    import javax.management.NotCompliantMBeanException;
122    import javax.management.ObjectName;
123    import javax.management.StandardMBean;
124    
125    import org.apache.commons.logging.Log;
126    import org.apache.commons.logging.LogFactory;
127    import org.apache.commons.logging.impl.Log4JLogger;
128    import org.apache.hadoop.HadoopIllegalArgumentException;
129    import org.apache.hadoop.classification.InterfaceAudience;
130    import org.apache.hadoop.conf.Configuration;
131    import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
132    import org.apache.hadoop.fs.CacheFlag;
133    import org.apache.hadoop.fs.ContentSummary;
134    import org.apache.hadoop.fs.CreateFlag;
135    import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException;
136    import org.apache.hadoop.fs.FileAlreadyExistsException;
137    import org.apache.hadoop.fs.FileStatus;
138    import org.apache.hadoop.fs.FileSystem;
139    import org.apache.hadoop.fs.FsServerDefaults;
140    import org.apache.hadoop.fs.InvalidPathException;
141    import org.apache.hadoop.fs.Options;
142    import org.apache.hadoop.fs.Options.Rename;
143    import org.apache.hadoop.fs.ParentNotDirectoryException;
144    import org.apache.hadoop.fs.Path;
145    import org.apache.hadoop.fs.UnresolvedLinkException;
146    import org.apache.hadoop.fs.permission.FsAction;
147    import org.apache.hadoop.fs.permission.FsPermission;
148    import org.apache.hadoop.fs.permission.PermissionStatus;
149    import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
150    import org.apache.hadoop.ha.ServiceFailedException;
151    import org.apache.hadoop.hdfs.DFSConfigKeys;
152    import org.apache.hadoop.hdfs.DFSUtil;
153    import org.apache.hadoop.hdfs.HAUtil;
154    import org.apache.hadoop.hdfs.HdfsConfiguration;
155    import org.apache.hadoop.hdfs.StorageType;
156    import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
157    import org.apache.hadoop.hdfs.protocol.Block;
158    import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
159    import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
160    import org.apache.hadoop.hdfs.protocol.ClientProtocol;
161    import org.apache.hadoop.hdfs.protocol.DatanodeID;
162    import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
163    import org.apache.hadoop.hdfs.protocol.DirectoryListing;
164    import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
165    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
166    import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
167    import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
168    import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
169    import org.apache.hadoop.hdfs.protocol.LocatedBlock;
170    import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
171    import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
172    import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
173    import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
174    import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException;
175    import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
176    import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
177    import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
178    import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure;
179    import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
180    import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
181    import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
182    import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
183    import org.apache.hadoop.hdfs.server.blockmanagement.*;
184    import org.apache.hadoop.hdfs.server.common.GenerationStamp;
185    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
186    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
187    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
188    import org.apache.hadoop.hdfs.server.common.Storage;
189    import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
190    import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
191    import org.apache.hadoop.hdfs.server.common.Util;
192    import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
193    import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
194    import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
195    import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
196    import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
197    import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
198    import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
199    import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer;
200    import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
201    import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
202    import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
203    import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable.SnapshotDiffInfo;
204    import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithSnapshot;
205    import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
206    import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
207    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
208    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
209    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
210    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status;
211    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
212    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
213    import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
214    import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
215    import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
216    import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
217    import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
218    import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
219    import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
220    import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
221    import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
222    import org.apache.hadoop.hdfs.server.protocol.StorageReport;
223    import org.apache.hadoop.hdfs.util.ChunkedArrayList;
224    import org.apache.hadoop.io.IOUtils;
225    import org.apache.hadoop.io.Text;
226    import org.apache.hadoop.ipc.RetriableException;
227    import org.apache.hadoop.ipc.RetryCache;
228    import org.apache.hadoop.ipc.RetryCache.CacheEntry;
229    import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload;
230    import org.apache.hadoop.ipc.Server;
231    import org.apache.hadoop.ipc.StandbyException;
232    import org.apache.hadoop.metrics2.annotation.Metric;
233    import org.apache.hadoop.metrics2.annotation.Metrics;
234    import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
235    import org.apache.hadoop.metrics2.util.MBeans;
236    import org.apache.hadoop.net.NetworkTopology;
237    import org.apache.hadoop.net.Node;
238    import org.apache.hadoop.security.AccessControlException;
239    import org.apache.hadoop.security.UserGroupInformation;
240    import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
241    import org.apache.hadoop.security.token.SecretManager.InvalidToken;
242    import org.apache.hadoop.security.token.Token;
243    import org.apache.hadoop.security.token.TokenIdentifier;
244    import org.apache.hadoop.security.token.delegation.DelegationKey;
245    import org.apache.hadoop.util.Daemon;
246    import org.apache.hadoop.util.DataChecksum;
247    import org.apache.hadoop.util.StringUtils;
248    import org.apache.hadoop.util.Time;
249    import org.apache.hadoop.util.VersionInfo;
250    import org.apache.log4j.Appender;
251    import org.apache.log4j.AsyncAppender;
252    import org.apache.log4j.Logger;
253    import org.mortbay.util.ajax.JSON;
254    
255    import com.google.common.annotations.VisibleForTesting;
256    import com.google.common.base.Charsets;
257    import com.google.common.base.Preconditions;
258    import com.google.common.collect.ImmutableMap;
259    import com.google.common.collect.Lists;
260    
261    /***************************************************
262     * FSNamesystem does the actual bookkeeping work for the
263     * DataNode.
264     *
265     * It tracks several important tables.
266     *
267     * 1)  valid fsname --> blocklist  (kept on disk, logged)
268     * 2)  Set of all valid blocks (inverted #1)
269     * 3)  block --> machinelist (kept in memory, rebuilt dynamically from reports)
270     * 4)  machine --> blocklist (inverted #2)
271     * 5)  LRU cache of updated-heartbeat machines
272     ***************************************************/
273    @InterfaceAudience.Private
274    @Metrics(context="dfs")
275    public class FSNamesystem implements Namesystem, FSClusterStats,
276        FSNamesystemMBean, NameNodeMXBean {
277      public static final Log LOG = LogFactory.getLog(FSNamesystem.class);
278    
279      private static final ThreadLocal<StringBuilder> auditBuffer =
280        new ThreadLocal<StringBuilder>() {
281          @Override
282          protected StringBuilder initialValue() {
283            return new StringBuilder();
284          }
285      };
286    
287      @VisibleForTesting
288      public boolean isAuditEnabled() {
289        return !isDefaultAuditLogger || auditLog.isInfoEnabled();
290      }
291    
292      private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink)
293          throws IOException {
294        return (isAuditEnabled() && isExternalInvocation())
295            ? dir.getFileInfo(path, resolveSymlink) : null;
296      }
297      
298      private void logAuditEvent(boolean succeeded, String cmd, String src)
299          throws IOException {
300        logAuditEvent(succeeded, cmd, src, null, null);
301      }
302      
303      private void logAuditEvent(boolean succeeded, String cmd, String src,
304          String dst, HdfsFileStatus stat) throws IOException {
305        if (isAuditEnabled() && isExternalInvocation()) {
306          logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(),
307                        cmd, src, dst, stat);
308        }
309      }
310    
311      private void logAuditEvent(boolean succeeded,
312          UserGroupInformation ugi, InetAddress addr, String cmd, String src,
313          String dst, HdfsFileStatus stat) {
314        FileStatus status = null;
315        if (stat != null) {
316          Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null;
317          Path path = dst != null ? new Path(dst) : new Path(src);
318          status = new FileStatus(stat.getLen(), stat.isDir(),
319              stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(),
320              stat.getAccessTime(), stat.getPermission(), stat.getOwner(),
321              stat.getGroup(), symlink, path);
322        }
323        for (AuditLogger logger : auditLoggers) {
324          if (logger instanceof HdfsAuditLogger) {
325            HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger;
326            hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst,
327                status, ugi, dtSecretManager);
328          } else {
329            logger.logAuditEvent(succeeded, ugi.toString(), addr,
330                cmd, src, dst, status);
331          }
332        }
333      }
334    
335      /**
336       * Logger for audit events, noting successful FSNamesystem operations. Emits
337       * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated
338       * <code>key=value</code> pairs to be written for the following properties:
339       * <code>
340       * ugi=&lt;ugi in RPC&gt;
341       * ip=&lt;remote IP&gt;
342       * cmd=&lt;command&gt;
343       * src=&lt;src path&gt;
344       * dst=&lt;dst path (optional)&gt;
345       * perm=&lt;permissions (optional)&gt;
346       * </code>
347       */
348      public static final Log auditLog = LogFactory.getLog(
349          FSNamesystem.class.getName() + ".audit");
350    
351      static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100;
352      static int BLOCK_DELETION_INCREMENT = 1000;
353      private final boolean isPermissionEnabled;
354      private final UserGroupInformation fsOwner;
355      private final String fsOwnerShortUserName;
356      private final String supergroup;
357      private final boolean standbyShouldCheckpoint;
358      
359      // Scan interval is not configurable.
360      private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
361        TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
362      final DelegationTokenSecretManager dtSecretManager;
363      private final boolean alwaysUseDelegationTokensForTests;
364    
365      private static final Step STEP_AWAITING_REPORTED_BLOCKS =
366        new Step(StepType.AWAITING_REPORTED_BLOCKS);
367    
368      // Tracks whether the default audit logger is the only configured audit
369      // logger; this allows isAuditEnabled() to return false in case the
370      // underlying logger is disabled, and avoid some unnecessary work.
371      private final boolean isDefaultAuditLogger;
372      private final List<AuditLogger> auditLoggers;
373    
374      /** The namespace tree. */
375      FSDirectory dir;
376      private final BlockManager blockManager;
377      private final SnapshotManager snapshotManager;
378      private final CacheManager cacheManager;
379      private final DatanodeStatistics datanodeStatistics;
380    
381      // Block pool ID used by this namenode
382      private String blockPoolId;
383    
384      final LeaseManager leaseManager = new LeaseManager(this); 
385    
386      volatile Daemon smmthread = null;  // SafeModeMonitor thread
387      
388      Daemon nnrmthread = null; // NamenodeResourceMonitor thread
389    
390      Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread
391      /**
392       * When an active namenode will roll its own edit log, in # edits
393       */
394      private final long editLogRollerThreshold;
395      /**
396       * Check interval of an active namenode's edit log roller thread 
397       */
398      private final int editLogRollerInterval;
399    
400      private volatile boolean hasResourcesAvailable = false;
401      private volatile boolean fsRunning = true;
402      
403      /** The start time of the namesystem. */
404      private final long startTime = now();
405    
406      /** The interval of namenode checking for the disk space availability */
407      private final long resourceRecheckInterval;
408    
409      // The actual resource checker instance.
410      NameNodeResourceChecker nnResourceChecker;
411    
412      private final FsServerDefaults serverDefaults;
413      private final boolean supportAppends;
414      private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
415    
416      private volatile SafeModeInfo safeMode;  // safe mode information
417    
418      private final long maxFsObjects;          // maximum number of fs objects
419    
420      private final long minBlockSize;         // minimum block size
421      private final long maxBlocksPerFile;     // maximum # of blocks per file
422    
423      /**
424       * The global generation stamp for legacy blocks with randomly
425       * generated block IDs.
426       */
427      private final GenerationStamp generationStampV1 = new GenerationStamp();
428    
429      /**
430       * The global generation stamp for this file system.
431       */
432      private final GenerationStamp generationStampV2 = new GenerationStamp();
433    
434      /**
435       * The value of the generation stamp when the first switch to sequential
436       * block IDs was made. Blocks with generation stamps below this value
437       * have randomly allocated block IDs. Blocks with generation stamps above
438       * this value had sequentially allocated block IDs. Read from the fsImage
439       * (or initialized as an offset from the V1 (legacy) generation stamp on
440       * upgrade).
441       */
442      private long generationStampV1Limit =
443          GenerationStamp.GRANDFATHER_GENERATION_STAMP;
444    
445      /**
446       * The global block ID space for this file system.
447       */
448      @VisibleForTesting
449      private final SequentialBlockIdGenerator blockIdGenerator;
450    
451      // precision of access times.
452      private final long accessTimePrecision;
453    
454      /** Lock to protect FSNamesystem. */
455      private FSNamesystemLock fsLock;
456    
457      /**
458       * Used when this NN is in standby state to read from the shared edit log.
459       */
460      private EditLogTailer editLogTailer = null;
461    
462      /**
463       * Used when this NN is in standby state to perform checkpoints.
464       */
465      private StandbyCheckpointer standbyCheckpointer;
466    
467      /**
468       * Reference to the NN's HAContext object. This is only set once
469       * {@link #startCommonServices(Configuration, HAContext)} is called. 
470       */
471      private HAContext haContext;
472    
473      private final boolean haEnabled;
474      
475      /**
476       * Whether the namenode is in the middle of starting the active service
477       */
478      private volatile boolean startingActiveService = false;
479        
480      private INodeId inodeId;
481      
482      private final RetryCache retryCache;
483      
484      /**
485       * Set the last allocated inode id when fsimage or editlog is loaded. 
486       */
487      public void resetLastInodeId(long newValue) throws IOException {
488        try {
489          inodeId.skipTo(newValue);
490        } catch(IllegalStateException ise) {
491          throw new IOException(ise);
492        }
493      }
494    
495      /** Should only be used for tests to reset to any value */
496      void resetLastInodeIdWithoutChecking(long newValue) {
497        inodeId.setCurrentValue(newValue);
498      }
499      
500      /** @return the last inode ID. */
501      public long getLastInodeId() {
502        return inodeId.getCurrentValue();
503      }
504    
505      /** Allocate a new inode ID. */
506      public long allocateNewInodeId() {
507        return inodeId.nextValue();
508      }
509      
510      /**
511       * Clear all loaded data
512       */
513      void clear() {
514        dir.reset();
515        dtSecretManager.reset();
516        generationStampV1.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
517        generationStampV2.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
518        blockIdGenerator.setCurrentValue(
519            SequentialBlockIdGenerator.LAST_RESERVED_BLOCK_ID);
520        generationStampV1Limit = GenerationStamp.GRANDFATHER_GENERATION_STAMP;
521        leaseManager.removeAllLeases();
522        inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID);
523        snapshotManager.clearSnapshottableDirs();
524        cacheManager.clear();
525      }
526    
527      @VisibleForTesting
528      LeaseManager getLeaseManager() {
529        return leaseManager;
530      }
531      
532      /**
533       * Check the supplied configuration for correctness.
534       * @param conf Supplies the configuration to validate.
535       * @throws IOException if the configuration could not be queried.
536       * @throws IllegalArgumentException if the configuration is invalid.
537       */
538      private static void checkConfiguration(Configuration conf)
539          throws IOException {
540    
541        final Collection<URI> namespaceDirs =
542            FSNamesystem.getNamespaceDirs(conf);
543        final Collection<URI> editsDirs =
544            FSNamesystem.getNamespaceEditsDirs(conf);
545        final Collection<URI> requiredEditsDirs =
546            FSNamesystem.getRequiredNamespaceEditsDirs(conf);
547        final Collection<URI> sharedEditsDirs =
548            FSNamesystem.getSharedEditsDirs(conf);
549    
550        for (URI u : requiredEditsDirs) {
551          if (u.toString().compareTo(
552                  DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) {
553            continue;
554          }
555    
556          // Each required directory must also be in editsDirs or in
557          // sharedEditsDirs.
558          if (!editsDirs.contains(u) &&
559              !sharedEditsDirs.contains(u)) {
560            throw new IllegalArgumentException(
561                "Required edits directory " + u.toString() + " not present in " +
562                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " +
563                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" +
564                editsDirs.toString() + "; " +
565                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" +
566                requiredEditsDirs.toString() + ". " +
567                DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" +
568                sharedEditsDirs.toString() + ".");
569          }
570        }
571    
572        if (namespaceDirs.size() == 1) {
573          LOG.warn("Only one image storage directory ("
574              + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of dataloss"
575              + " due to lack of redundant storage directories!");
576        }
577        if (editsDirs.size() == 1) {
578          LOG.warn("Only one namespace edits storage directory ("
579              + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of dataloss"
580              + " due to lack of redundant storage directories!");
581        }
582      }
583    
584      /**
585       * Instantiates an FSNamesystem loaded from the image and edits
586       * directories specified in the passed Configuration.
587       *
588       * @param conf the Configuration which specifies the storage directories
589       *             from which to load
590       * @return an FSNamesystem which contains the loaded namespace
591       * @throws IOException if loading fails
592       */
593      public static FSNamesystem loadFromDisk(Configuration conf)
594          throws IOException {
595    
596        checkConfiguration(conf);
597        FSImage fsImage = new FSImage(conf,
598            FSNamesystem.getNamespaceDirs(conf),
599            FSNamesystem.getNamespaceEditsDirs(conf));
600        FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false);
601        StartupOption startOpt = NameNode.getStartupOption(conf);
602        if (startOpt == StartupOption.RECOVER) {
603          namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
604        }
605    
606        long loadStart = now();
607        String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
608        try {
609          namesystem.loadFSImage(startOpt, fsImage,
610            HAUtil.isHAEnabled(conf, nameserviceId));
611        } catch (IOException ioe) {
612          LOG.warn("Encountered exception loading fsimage", ioe);
613          fsImage.close();
614          throw ioe;
615        }
616        long timeTakenToLoadFSImage = now() - loadStart;
617        LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs");
618        NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics();
619        if (nnMetrics != null) {
620          nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage);
621        }
622        return namesystem;
623      }
624      
625      FSNamesystem(Configuration conf, FSImage fsImage) throws IOException {
626        this(conf, fsImage, false);
627      }
628      
629      /**
630       * Create an FSNamesystem associated with the specified image.
631       * 
632       * Note that this does not load any data off of disk -- if you would
633       * like that behavior, use {@link #loadFromDisk(Configuration)}
634       *
635       * @param conf configuration
636       * @param fsImage The FSImage to associate with
637       * @param ignoreRetryCache Whether or not should ignore the retry cache setup
638       *                         step. For Secondary NN this should be set to true.
639       * @throws IOException on bad configuration
640       */
641      FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache)
642          throws IOException {
643        if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY,
644                            DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) {
645          LOG.info("Enabling async auditlog");
646          enableAsyncAuditLog();
647        }
648        boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true);
649        LOG.info("fsLock is fair:" + fair);
650        fsLock = new FSNamesystemLock(fair);
651        try {
652          resourceRecheckInterval = conf.getLong(
653              DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY,
654              DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT);
655    
656          this.blockManager = new BlockManager(this, this, conf);
657          this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics();
658          this.blockIdGenerator = new SequentialBlockIdGenerator(this.blockManager);
659    
660          this.fsOwner = UserGroupInformation.getCurrentUser();
661          this.fsOwnerShortUserName = fsOwner.getShortUserName();
662          this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 
663                                     DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
664          this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY,
665                                                     DFS_PERMISSIONS_ENABLED_DEFAULT);
666          LOG.info("fsOwner             = " + fsOwner);
667          LOG.info("supergroup          = " + supergroup);
668          LOG.info("isPermissionEnabled = " + isPermissionEnabled);
669    
670          // block allocation has to be persisted in HA using a shared edits directory
671          // so that the standby has up-to-date namespace information
672          String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
673          this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);  
674          
675          // Sanity check the HA-related config.
676          if (nameserviceId != null) {
677            LOG.info("Determined nameservice ID: " + nameserviceId);
678          }
679          LOG.info("HA Enabled: " + haEnabled);
680          if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) {
681            LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf));
682            throw new IOException("Invalid configuration: a shared edits dir " +
683                "must not be specified if HA is not enabled.");
684          }
685    
686          // Get the checksum type from config
687          String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT);
688          DataChecksum.Type checksumType;
689          try {
690             checksumType = DataChecksum.Type.valueOf(checksumTypeStr);
691          } catch (IllegalArgumentException iae) {
692             throw new IOException("Invalid checksum type in "
693                + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr);
694          }
695    
696          this.serverDefaults = new FsServerDefaults(
697              conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT),
698              conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT),
699              conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT),
700              (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT),
701              conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT),
702              conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT),
703              conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT),
704              checksumType);
705          
706          this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY, 
707                                           DFS_NAMENODE_MAX_OBJECTS_DEFAULT);
708    
709          this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY,
710              DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT);
711          this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY,
712              DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT);
713          this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY,
714              DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT);
715          this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT);
716          LOG.info("Append Enabled: " + supportAppends);
717    
718          this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf);
719          
720          this.standbyShouldCheckpoint = conf.getBoolean(
721              DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
722          // # edit autoroll threshold is a multiple of the checkpoint threshold 
723          this.editLogRollerThreshold = (long)
724              (conf.getFloat(
725                  DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD,
726                  DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) *
727              conf.getLong(
728                  DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
729                  DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT));
730          this.editLogRollerInterval = conf.getInt(
731              DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS,
732              DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT);
733          this.inodeId = new INodeId();
734          
735          // For testing purposes, allow the DT secret manager to be started regardless
736          // of whether security is enabled.
737          alwaysUseDelegationTokensForTests = conf.getBoolean(
738              DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
739              DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
740    
741          this.dtSecretManager = createDelegationTokenSecretManager(conf);
742          this.dir = new FSDirectory(fsImage, this, conf);
743          this.snapshotManager = new SnapshotManager(dir);
744          this.cacheManager = new CacheManager(this, conf, blockManager);
745          this.safeMode = new SafeModeInfo(conf);
746          this.auditLoggers = initAuditLoggers(conf);
747          this.isDefaultAuditLogger = auditLoggers.size() == 1 &&
748            auditLoggers.get(0) instanceof DefaultAuditLogger;
749          this.retryCache = ignoreRetryCache ? null : initRetryCache(conf);
750        } catch(IOException e) {
751          LOG.error(getClass().getSimpleName() + " initialization failed.", e);
752          close();
753          throw e;
754        } catch (RuntimeException re) {
755          LOG.error(getClass().getSimpleName() + " initialization failed.", re);
756          close();
757          throw re;
758        }
759      }
760      
761      @VisibleForTesting
762      public RetryCache getRetryCache() {
763        return retryCache;
764      }
765      
766      /** Whether or not retry cache is enabled */
767      boolean hasRetryCache() {
768        return retryCache != null;
769      }
770      
771      void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) {
772        if (retryCache != null) {
773          retryCache.addCacheEntryWithPayload(clientId, callId, payload);
774        }
775      }
776      
777      void addCacheEntry(byte[] clientId, int callId) {
778        if (retryCache != null) {
779          retryCache.addCacheEntry(clientId, callId);
780        }
781      }
782      
783      @VisibleForTesting
784      static RetryCache initRetryCache(Configuration conf) {
785        boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY,
786            DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT);
787        LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled"));
788        if (enable) {
789          float heapPercent = conf.getFloat(
790              DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY,
791              DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT);
792          long entryExpiryMillis = conf.getLong(
793              DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY,
794              DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT);
795          LOG.info("Retry cache will use " + heapPercent
796              + " of total heap and retry cache entry expiry time is "
797              + entryExpiryMillis + " millis");
798          long entryExpiryNanos = entryExpiryMillis * 1000 * 1000;
799          return new RetryCache("Namenode Retry Cache", heapPercent,
800              entryExpiryNanos);
801        }
802        return null;
803      }
804    
805      private List<AuditLogger> initAuditLoggers(Configuration conf) {
806        // Initialize the custom access loggers if configured.
807        Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY);
808        List<AuditLogger> auditLoggers = Lists.newArrayList();
809        if (alClasses != null && !alClasses.isEmpty()) {
810          for (String className : alClasses) {
811            try {
812              AuditLogger logger;
813              if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) {
814                logger = new DefaultAuditLogger();
815              } else {
816                logger = (AuditLogger) Class.forName(className).newInstance();
817              }
818              logger.initialize(conf);
819              auditLoggers.add(logger);
820            } catch (RuntimeException re) {
821              throw re;
822            } catch (Exception e) {
823              throw new RuntimeException(e);
824            }
825          }
826        }
827    
828        // Make sure there is at least one logger installed.
829        if (auditLoggers.isEmpty()) {
830          auditLoggers.add(new DefaultAuditLogger());
831        }
832        return Collections.unmodifiableList(auditLoggers);
833      }
834    
835      void loadFSImage(StartupOption startOpt, FSImage fsImage, boolean haEnabled)
836          throws IOException {
837        // format before starting up if requested
838        if (startOpt == StartupOption.FORMAT) {
839          
840          fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id
841    
842          startOpt = StartupOption.REGULAR;
843        }
844        boolean success = false;
845        writeLock();
846        try {
847          // We shouldn't be calling saveNamespace if we've come up in standby state.
848          MetaRecoveryContext recovery = startOpt.createRecoveryContext();
849          boolean needToSave =
850            fsImage.recoverTransitionRead(startOpt, this, recovery) && !haEnabled;
851          if (needToSave) {
852            fsImage.saveNamespace(this);
853          } else {
854            // No need to save, so mark the phase done.
855            StartupProgress prog = NameNode.getStartupProgress();
856            prog.beginPhase(Phase.SAVING_CHECKPOINT);
857            prog.endPhase(Phase.SAVING_CHECKPOINT);
858          }
859          // This will start a new log segment and write to the seen_txid file, so
860          // we shouldn't do it when coming up in standby state
861          if (!haEnabled) {
862            fsImage.openEditLogForWrite();
863          }
864          success = true;
865        } finally {
866          if (!success) {
867            fsImage.close();
868          }
869          writeUnlock();
870        }
871        dir.imageLoadComplete();
872      }
873    
874      private void startSecretManager() {
875        if (dtSecretManager != null) {
876          try {
877            dtSecretManager.startThreads();
878          } catch (IOException e) {
879            // Inability to start secret manager
880            // can't be recovered from.
881            throw new RuntimeException(e);
882          }
883        }
884      }
885      
886      private void startSecretManagerIfNecessary() {
887        boolean shouldRun = shouldUseDelegationTokens() &&
888          !isInSafeMode() && getEditLog().isOpenForWrite();
889        boolean running = dtSecretManager.isRunning();
890        if (shouldRun && !running) {
891          startSecretManager();
892        }
893      }
894    
895      private void stopSecretManager() {
896        if (dtSecretManager != null) {
897          dtSecretManager.stopThreads();
898        }
899      }
900      
901      /** 
902       * Start services common to both active and standby states
903       * @param haContext 
904       * @throws IOException
905       */
906      void startCommonServices(Configuration conf, HAContext haContext) throws IOException {
907        this.registerMBean(); // register the MBean for the FSNamesystemState
908        writeLock();
909        this.haContext = haContext;
910        try {
911          nnResourceChecker = new NameNodeResourceChecker(conf);
912          checkAvailableResources();
913          assert safeMode != null &&
914            !safeMode.isPopulatingReplQueues();
915          StartupProgress prog = NameNode.getStartupProgress();
916          prog.beginPhase(Phase.SAFEMODE);
917          prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS,
918            getCompleteBlocksTotal());
919          setBlockTotal();
920          blockManager.activate(conf);
921        } finally {
922          writeUnlock();
923        }
924        
925        registerMXBean();
926        DefaultMetricsSystem.instance().register(this);
927      }
928      
929      /** 
930       * Stop services common to both active and standby states
931       * @throws IOException
932       */
933      void stopCommonServices() {
934        writeLock();
935        try {
936          if (blockManager != null) blockManager.close();
937        } finally {
938          writeUnlock();
939        }
940        RetryCache.clear(retryCache);
941      }
942      
943      /**
944       * Start services required in active state
945       * @throws IOException
946       */
947      void startActiveServices() throws IOException {
948        startingActiveService = true;
949        LOG.info("Starting services required for active state");
950        writeLock();
951        try {
952          FSEditLog editLog = dir.fsImage.getEditLog();
953          
954          if (!editLog.isOpenForWrite()) {
955            // During startup, we're already open for write during initialization.
956            editLog.initJournalsForWrite();
957            // May need to recover
958            editLog.recoverUnclosedStreams();
959            
960            LOG.info("Catching up to latest edits from old active before " +
961                "taking over writer role in edits logs");
962            editLogTailer.catchupDuringFailover();
963            
964            blockManager.setPostponeBlocksFromFuture(false);
965            blockManager.getDatanodeManager().markAllDatanodesStale();
966            blockManager.clearQueues();
967            blockManager.processAllPendingDNMessages();
968    
969            if (!isInSafeMode() ||
970                (isInSafeMode() && safeMode.isPopulatingReplQueues())) {
971              LOG.info("Reprocessing replication and invalidation queues");
972              blockManager.processMisReplicatedBlocks();
973            }
974            
975            if (LOG.isDebugEnabled()) {
976              LOG.debug("NameNode metadata after re-processing " +
977                  "replication and invalidation queues during failover:\n" +
978                  metaSaveAsString());
979            }
980            
981            long nextTxId = dir.fsImage.getLastAppliedTxId() + 1;
982            LOG.info("Will take over writing edit logs at txnid " + 
983                nextTxId);
984            editLog.setNextTxId(nextTxId);
985    
986            dir.fsImage.editLog.openForWrite();
987          }
988          if (haEnabled) {
989            // Renew all of the leases before becoming active.
990            // This is because, while we were in standby mode,
991            // the leases weren't getting renewed on this NN.
992            // Give them all a fresh start here.
993            leaseManager.renewAllLeases();
994          }
995          leaseManager.startMonitor();
996          startSecretManagerIfNecessary();
997    
998          //ResourceMonitor required only at ActiveNN. See HDFS-2914
999          this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
1000          nnrmthread.start();
1001    
1002          nnEditLogRoller = new Daemon(new NameNodeEditLogRoller(
1003              editLogRollerThreshold, editLogRollerInterval));
1004          nnEditLogRoller.start();
1005    
1006          cacheManager.startMonitorThread();
1007          blockManager.getDatanodeManager().setShouldSendCachingCommands(true);
1008        } finally {
1009          writeUnlock();
1010          startingActiveService = false;
1011        }
1012      }
1013      
1014      /**
1015       * @return Whether the namenode is transitioning to active state and is in the
1016       *         middle of the {@link #startActiveServices()}
1017       */
1018      public boolean inTransitionToActive() {
1019        return haEnabled && haContext != null
1020            && haContext.getState().getServiceState() == HAServiceState.ACTIVE
1021            && startingActiveService;
1022      }
1023    
1024      private boolean shouldUseDelegationTokens() {
1025        return UserGroupInformation.isSecurityEnabled() ||
1026          alwaysUseDelegationTokensForTests;
1027      }
1028    
1029      /** 
1030       * Stop services required in active state
1031       * @throws InterruptedException
1032       */
1033      void stopActiveServices() {
1034        LOG.info("Stopping services started for active state");
1035        writeLock();
1036        try {
1037          stopSecretManager();
1038          if (leaseManager != null) {
1039            leaseManager.stopMonitor();
1040          }
1041          if (nnrmthread != null) {
1042            ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
1043            nnrmthread.interrupt();
1044          }
1045          if (nnEditLogRoller != null) {
1046            ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop();
1047            nnEditLogRoller.interrupt();
1048          }
1049          if (dir != null && dir.fsImage != null) {
1050            if (dir.fsImage.editLog != null) {
1051              dir.fsImage.editLog.close();
1052            }
1053            // Update the fsimage with the last txid that we wrote
1054            // so that the tailer starts from the right spot.
1055            dir.fsImage.updateLastAppliedTxIdFromWritten();
1056          }
1057          cacheManager.stopMonitorThread();
1058          cacheManager.clearDirectiveStats();
1059          blockManager.getDatanodeManager().clearPendingCachingCommands();
1060          blockManager.getDatanodeManager().setShouldSendCachingCommands(false);
1061        } finally {
1062          writeUnlock();
1063        }
1064      }
1065      
1066      /**
1067       * Start services required in standby state 
1068       * 
1069       * @throws IOException
1070       */
1071      void startStandbyServices(final Configuration conf) throws IOException {
1072        LOG.info("Starting services required for standby state");
1073        if (!dir.fsImage.editLog.isOpenForRead()) {
1074          // During startup, we're already open for read.
1075          dir.fsImage.editLog.initSharedJournalsForRead();
1076        }
1077        
1078        blockManager.setPostponeBlocksFromFuture(true);
1079    
1080        editLogTailer = new EditLogTailer(this, conf);
1081        editLogTailer.start();
1082        if (standbyShouldCheckpoint) {
1083          standbyCheckpointer = new StandbyCheckpointer(conf, this);
1084          standbyCheckpointer.start();
1085        }
1086      }
1087    
1088    
1089      /**
1090       * Called while the NN is in Standby state, but just about to be
1091       * asked to enter Active state. This cancels any checkpoints
1092       * currently being taken.
1093       */
1094      void prepareToStopStandbyServices() throws ServiceFailedException {
1095        if (standbyCheckpointer != null) {
1096          standbyCheckpointer.cancelAndPreventCheckpoints(
1097              "About to leave standby state");
1098        }
1099      }
1100    
1101      /** Stop services required in standby state */
1102      void stopStandbyServices() throws IOException {
1103        LOG.info("Stopping services started for standby state");
1104        if (standbyCheckpointer != null) {
1105          standbyCheckpointer.stop();
1106        }
1107        if (editLogTailer != null) {
1108          editLogTailer.stop();
1109        }
1110        if (dir != null && dir.fsImage != null && dir.fsImage.editLog != null) {
1111          dir.fsImage.editLog.close();
1112        }
1113      }
1114      
1115      @Override
1116      public void checkOperation(OperationCategory op) throws StandbyException {
1117        if (haContext != null) {
1118          // null in some unit tests
1119          haContext.checkOperation(op);
1120        }
1121      }
1122      
1123      /**
1124       * @throws RetriableException
1125       *           If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3)
1126       *           NameNode is in active state
1127       * @throws SafeModeException
1128       *           Otherwise if NameNode is in SafeMode.
1129       */
1130      private void checkNameNodeSafeMode(String errorMsg)
1131          throws RetriableException, SafeModeException {
1132        if (isInSafeMode()) {
1133          SafeModeException se = new SafeModeException(errorMsg, safeMode);
1134          if (haEnabled && haContext != null
1135              && haContext.getState().getServiceState() == HAServiceState.ACTIVE
1136              && shouldRetrySafeMode(this.safeMode)) {
1137            throw new RetriableException(se);
1138          } else {
1139            throw se;
1140          }
1141        }
1142      }
1143      
1144      /**
1145       * We already know that the safemode is on. We will throw a RetriableException
1146       * if the safemode is not manual or caused by low resource.
1147       */
1148      private boolean shouldRetrySafeMode(SafeModeInfo safeMode) {
1149        if (safeMode == null) {
1150          return false;
1151        } else {
1152          return !safeMode.isManual() && !safeMode.areResourcesLow();
1153        }
1154      }
1155      
1156      public static Collection<URI> getNamespaceDirs(Configuration conf) {
1157        return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
1158      }
1159    
1160      /**
1161       * Get all edits dirs which are required. If any shared edits dirs are
1162       * configured, these are also included in the set of required dirs.
1163       * 
1164       * @param conf the HDFS configuration.
1165       * @return all required dirs.
1166       */
1167      public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) {
1168        Set<URI> ret = new HashSet<URI>();
1169        ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY));
1170        ret.addAll(getSharedEditsDirs(conf));
1171        return ret;
1172      }
1173    
1174      private static Collection<URI> getStorageDirs(Configuration conf,
1175                                                    String propertyName) {
1176        Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName);
1177        StartupOption startOpt = NameNode.getStartupOption(conf);
1178        if(startOpt == StartupOption.IMPORT) {
1179          // In case of IMPORT this will get rid of default directories 
1180          // but will retain directories specified in hdfs-site.xml
1181          // When importing image from a checkpoint, the name-node can
1182          // start with empty set of storage directories.
1183          Configuration cE = new HdfsConfiguration(false);
1184          cE.addResource("core-default.xml");
1185          cE.addResource("core-site.xml");
1186          cE.addResource("hdfs-default.xml");
1187          Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName);
1188          dirNames.removeAll(dirNames2);
1189          if(dirNames.isEmpty())
1190            LOG.warn("!!! WARNING !!!" +
1191              "\n\tThe NameNode currently runs without persistent storage." +
1192              "\n\tAny changes to the file system meta-data may be lost." +
1193              "\n\tRecommended actions:" +
1194              "\n\t\t- shutdown and restart NameNode with configured \"" 
1195              + propertyName + "\" in hdfs-site.xml;" +
1196              "\n\t\t- use Backup Node as a persistent and up-to-date storage " +
1197              "of the file system meta-data.");
1198        } else if (dirNames.isEmpty()) {
1199          dirNames = Collections.singletonList(
1200              DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT);
1201        }
1202        return Util.stringCollectionAsURIs(dirNames);
1203      }
1204    
1205      /**
1206       * Return an ordered list of edits directories to write to.
1207       * The list is ordered such that all shared edits directories
1208       * are ordered before non-shared directories, and any duplicates
1209       * are removed. The order they are specified in the configuration
1210       * is retained.
1211       * @return Collection of shared edits directories.
1212       * @throws IOException if multiple shared edits directories are configured
1213       */
1214      public static List<URI> getNamespaceEditsDirs(Configuration conf)
1215          throws IOException {
1216        return getNamespaceEditsDirs(conf, true);
1217      }
1218      
1219      public static List<URI> getNamespaceEditsDirs(Configuration conf,
1220          boolean includeShared)
1221          throws IOException {
1222        // Use a LinkedHashSet so that order is maintained while we de-dup
1223        // the entries.
1224        LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>();
1225        
1226        if (includeShared) {
1227          List<URI> sharedDirs = getSharedEditsDirs(conf);
1228      
1229          // Fail until multiple shared edits directories are supported (HDFS-2782)
1230          if (sharedDirs.size() > 1) {
1231            throw new IOException(
1232                "Multiple shared edits directories are not yet supported");
1233          }
1234      
1235          // First add the shared edits dirs. It's critical that the shared dirs
1236          // are added first, since JournalSet syncs them in the order they are listed,
1237          // and we need to make sure all edits are in place in the shared storage
1238          // before they are replicated locally. See HDFS-2874.
1239          for (URI dir : sharedDirs) {
1240            if (!editsDirs.add(dir)) {
1241              LOG.warn("Edits URI " + dir + " listed multiple times in " + 
1242                  DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates.");
1243            }
1244          }
1245        }    
1246        // Now add the non-shared dirs.
1247        for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) {
1248          if (!editsDirs.add(dir)) {
1249            LOG.warn("Edits URI " + dir + " listed multiple times in " + 
1250                DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " +
1251                DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates.");
1252          }
1253        }
1254    
1255        if (editsDirs.isEmpty()) {
1256          // If this is the case, no edit dirs have been explicitly configured.
1257          // Image dirs are to be used for edits too.
1258          return Lists.newArrayList(getNamespaceDirs(conf));
1259        } else {
1260          return Lists.newArrayList(editsDirs);
1261        }
1262      }
1263      
1264      /**
1265       * Returns edit directories that are shared between primary and secondary.
1266       * @param conf
1267       * @return Collection of edit directories.
1268       */
1269      public static List<URI> getSharedEditsDirs(Configuration conf) {
1270        // don't use getStorageDirs here, because we want an empty default
1271        // rather than the dir in /tmp
1272        Collection<String> dirNames = conf.getTrimmedStringCollection(
1273            DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
1274        return Util.stringCollectionAsURIs(dirNames);
1275      }
1276    
1277      @Override
1278      public void readLock() {
1279        this.fsLock.readLock().lock();
1280      }
1281      @Override
1282      public void readUnlock() {
1283        this.fsLock.readLock().unlock();
1284      }
1285      @Override
1286      public void writeLock() {
1287        this.fsLock.writeLock().lock();
1288      }
1289      @Override
1290      public void writeLockInterruptibly() throws InterruptedException {
1291        this.fsLock.writeLock().lockInterruptibly();
1292      }
1293      @Override
1294      public void writeUnlock() {
1295        this.fsLock.writeLock().unlock();
1296      }
1297      @Override
1298      public boolean hasWriteLock() {
1299        return this.fsLock.isWriteLockedByCurrentThread();
1300      }
1301      @Override
1302      public boolean hasReadLock() {
1303        return this.fsLock.getReadHoldCount() > 0 || hasWriteLock();
1304      }
1305    
1306      public int getReadHoldCount() {
1307        return this.fsLock.getReadHoldCount();
1308      }
1309    
1310      public int getWriteHoldCount() {
1311        return this.fsLock.getWriteHoldCount();
1312      }
1313    
1314      NamespaceInfo getNamespaceInfo() {
1315        readLock();
1316        try {
1317          return unprotectedGetNamespaceInfo();
1318        } finally {
1319          readUnlock();
1320        }
1321      }
1322    
1323      /**
1324       * Version of @see #getNamespaceInfo() that is not protected by a lock.
1325       */
1326      NamespaceInfo unprotectedGetNamespaceInfo() {
1327        return new NamespaceInfo(dir.fsImage.getStorage().getNamespaceID(),
1328            getClusterId(), getBlockPoolId(),
1329            dir.fsImage.getStorage().getCTime());
1330      }
1331    
1332      /**
1333       * Close down this file system manager.
1334       * Causes heartbeat and lease daemons to stop; waits briefly for
1335       * them to finish, but a short timeout returns control back to caller.
1336       */
1337      void close() {
1338        fsRunning = false;
1339        try {
1340          stopCommonServices();
1341          if (smmthread != null) smmthread.interrupt();
1342        } finally {
1343          // using finally to ensure we also wait for lease daemon
1344          try {
1345            stopActiveServices();
1346            stopStandbyServices();
1347            if (dir != null) {
1348              dir.close();
1349            }
1350          } catch (IOException ie) {
1351            LOG.error("Error closing FSDirectory", ie);
1352            IOUtils.cleanup(LOG, dir);
1353          }
1354        }
1355      }
1356    
1357      @Override
1358      public boolean isRunning() {
1359        return fsRunning;
1360      }
1361      
1362      @Override
1363      public boolean isInStandbyState() {
1364        if (haContext == null || haContext.getState() == null) {
1365          // We're still starting up. In this case, if HA is
1366          // on for the cluster, we always start in standby. Otherwise
1367          // start in active.
1368          return haEnabled;
1369        }
1370    
1371        return HAServiceState.STANDBY == haContext.getState().getServiceState();
1372      }
1373    
1374      /**
1375       * Dump all metadata into specified file
1376       */
1377      void metaSave(String filename) throws IOException {
1378        checkSuperuserPrivilege();
1379        checkOperation(OperationCategory.UNCHECKED);
1380        writeLock();
1381        try {
1382          checkOperation(OperationCategory.UNCHECKED);
1383          File file = new File(System.getProperty("hadoop.log.dir"), filename);
1384          PrintWriter out = new PrintWriter(new BufferedWriter(
1385              new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8)));
1386          metaSave(out);
1387          out.flush();
1388          out.close();
1389        } finally {
1390          writeUnlock();
1391        }
1392      }
1393    
1394      private void metaSave(PrintWriter out) {
1395        assert hasWriteLock();
1396        long totalInodes = this.dir.totalInodes();
1397        long totalBlocks = this.getBlocksTotal();
1398        out.println(totalInodes + " files and directories, " + totalBlocks
1399            + " blocks = " + (totalInodes + totalBlocks) + " total");
1400    
1401        blockManager.metaSave(out);
1402      }
1403    
1404      private String metaSaveAsString() {
1405        StringWriter sw = new StringWriter();
1406        PrintWriter pw = new PrintWriter(sw);
1407        metaSave(pw);
1408        pw.flush();
1409        return sw.toString();
1410      }
1411      
1412    
1413      long getDefaultBlockSize() {
1414        return serverDefaults.getBlockSize();
1415      }
1416    
1417      FsServerDefaults getServerDefaults() throws StandbyException {
1418        checkOperation(OperationCategory.READ);
1419        return serverDefaults;
1420      }
1421    
1422      long getAccessTimePrecision() {
1423        return accessTimePrecision;
1424      }
1425    
1426      private boolean isAccessTimeSupported() {
1427        return accessTimePrecision > 0;
1428      }
1429    
1430      /////////////////////////////////////////////////////////
1431      //
1432      // These methods are called by HadoopFS clients
1433      //
1434      /////////////////////////////////////////////////////////
1435      /**
1436       * Set permissions for an existing file.
1437       * @throws IOException
1438       */
1439      void setPermission(String src, FsPermission permission)
1440          throws AccessControlException, FileNotFoundException, SafeModeException,
1441          UnresolvedLinkException, IOException {
1442        try {
1443          setPermissionInt(src, permission);
1444        } catch (AccessControlException e) {
1445          logAuditEvent(false, "setPermission", src);
1446          throw e;
1447        }
1448      }
1449    
1450      private void setPermissionInt(String src, FsPermission permission)
1451          throws AccessControlException, FileNotFoundException, SafeModeException,
1452          UnresolvedLinkException, IOException {
1453        HdfsFileStatus resultingStat = null;
1454        FSPermissionChecker pc = getPermissionChecker();
1455        checkOperation(OperationCategory.WRITE);
1456        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1457        writeLock();
1458        try {
1459          checkOperation(OperationCategory.WRITE);
1460          checkNameNodeSafeMode("Cannot set permission for " + src);
1461          src = FSDirectory.resolvePath(src, pathComponents, dir);
1462          checkOwner(pc, src);
1463          dir.setPermission(src, permission);
1464          resultingStat = getAuditFileInfo(src, false);
1465        } finally {
1466          writeUnlock();
1467        }
1468        getEditLog().logSync();
1469        logAuditEvent(true, "setPermission", src, null, resultingStat);
1470      }
1471    
1472      /**
1473       * Set owner for an existing file.
1474       * @throws IOException
1475       */
1476      void setOwner(String src, String username, String group)
1477          throws AccessControlException, FileNotFoundException, SafeModeException,
1478          UnresolvedLinkException, IOException {
1479        try {
1480          setOwnerInt(src, username, group);
1481        } catch (AccessControlException e) {
1482          logAuditEvent(false, "setOwner", src);
1483          throw e;
1484        } 
1485      }
1486    
1487      private void setOwnerInt(String src, String username, String group)
1488          throws AccessControlException, FileNotFoundException, SafeModeException,
1489          UnresolvedLinkException, IOException {
1490        HdfsFileStatus resultingStat = null;
1491        FSPermissionChecker pc = getPermissionChecker();
1492        checkOperation(OperationCategory.WRITE);
1493        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1494        writeLock();
1495        try {
1496          checkOperation(OperationCategory.WRITE);
1497          checkNameNodeSafeMode("Cannot set owner for " + src);
1498          src = FSDirectory.resolvePath(src, pathComponents, dir);
1499          checkOwner(pc, src);
1500          if (!pc.isSuperUser()) {
1501            if (username != null && !pc.getUser().equals(username)) {
1502              throw new AccessControlException("Non-super user cannot change owner");
1503            }
1504            if (group != null && !pc.containsGroup(group)) {
1505              throw new AccessControlException("User does not belong to " + group);
1506            }
1507          }
1508          dir.setOwner(src, username, group);
1509          resultingStat = getAuditFileInfo(src, false);
1510        } finally {
1511          writeUnlock();
1512        }
1513        getEditLog().logSync();
1514        logAuditEvent(true, "setOwner", src, null, resultingStat);
1515      }
1516    
1517      /**
1518       * Get block locations within the specified range.
1519       * @see ClientProtocol#getBlockLocations(String, long, long)
1520       */
1521      LocatedBlocks getBlockLocations(String clientMachine, String src,
1522          long offset, long length) throws AccessControlException,
1523          FileNotFoundException, UnresolvedLinkException, IOException {
1524        LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true,
1525            true);
1526        if (blocks != null) {
1527          blockManager.getDatanodeManager().sortLocatedBlocks(
1528              clientMachine, blocks.getLocatedBlocks());
1529          
1530          LocatedBlock lastBlock = blocks.getLastLocatedBlock();
1531          if (lastBlock != null) {
1532            ArrayList<LocatedBlock> lastBlockList = new ArrayList<LocatedBlock>();
1533            lastBlockList.add(lastBlock);
1534            blockManager.getDatanodeManager().sortLocatedBlocks(
1535                                  clientMachine, lastBlockList);
1536          }
1537        }
1538        return blocks;
1539      }
1540    
1541      /**
1542       * Get block locations within the specified range.
1543       * @see ClientProtocol#getBlockLocations(String, long, long)
1544       * @throws FileNotFoundException, UnresolvedLinkException, IOException
1545       */
1546      LocatedBlocks getBlockLocations(String src, long offset, long length,
1547          boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode)
1548          throws FileNotFoundException, UnresolvedLinkException, IOException {
1549        try {
1550          return getBlockLocationsInt(src, offset, length, doAccessTime,
1551                                      needBlockToken, checkSafeMode);
1552        } catch (AccessControlException e) {
1553          logAuditEvent(false, "open", src);
1554          throw e;
1555        }
1556      }
1557    
1558      private LocatedBlocks getBlockLocationsInt(String src, long offset,
1559          long length, boolean doAccessTime, boolean needBlockToken,
1560          boolean checkSafeMode)
1561          throws FileNotFoundException, UnresolvedLinkException, IOException {
1562        if (offset < 0) {
1563          throw new HadoopIllegalArgumentException(
1564              "Negative offset is not supported. File: " + src);
1565        }
1566        if (length < 0) {
1567          throw new HadoopIllegalArgumentException(
1568              "Negative length is not supported. File: " + src);
1569        }
1570        final LocatedBlocks ret = getBlockLocationsUpdateTimes(src,
1571            offset, length, doAccessTime, needBlockToken);  
1572        logAuditEvent(true, "open", src);
1573        if (checkSafeMode && isInSafeMode()) {
1574          for (LocatedBlock b : ret.getLocatedBlocks()) {
1575            // if safemode & no block locations yet then throw safemodeException
1576            if ((b.getLocations() == null) || (b.getLocations().length == 0)) {
1577              SafeModeException se = new SafeModeException(
1578                  "Zero blocklocations for " + src, safeMode);
1579              if (haEnabled && haContext != null && 
1580                  haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
1581                throw new RetriableException(se);
1582              } else {
1583                throw se;
1584              }
1585            }
1586          }
1587        }
1588        return ret;
1589      }
1590    
1591      /*
1592       * Get block locations within the specified range, updating the
1593       * access times if necessary. 
1594       */
1595      private LocatedBlocks getBlockLocationsUpdateTimes(String src, long offset,
1596          long length, boolean doAccessTime, boolean needBlockToken)
1597          throws FileNotFoundException,
1598          UnresolvedLinkException, IOException {
1599        FSPermissionChecker pc = getPermissionChecker();
1600        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1601        for (int attempt = 0; attempt < 2; attempt++) {
1602          boolean isReadOp = (attempt == 0);
1603          if (isReadOp) { // first attempt is with readlock
1604            checkOperation(OperationCategory.READ);
1605            readLock();
1606          }  else { // second attempt is with  write lock
1607            checkOperation(OperationCategory.WRITE);
1608            writeLock(); // writelock is needed to set accesstime
1609          }
1610          src = FSDirectory.resolvePath(src, pathComponents, dir);
1611          try {
1612            if (isReadOp) {
1613              checkOperation(OperationCategory.READ);
1614            } else {
1615              checkOperation(OperationCategory.WRITE);
1616            }
1617            if (isPermissionEnabled) {
1618              checkPathAccess(pc, src, FsAction.READ);
1619            }
1620    
1621            // if the namenode is in safemode, then do not update access time
1622            if (isInSafeMode()) {
1623              doAccessTime = false;
1624            }
1625    
1626            final INodesInPath iip = dir.getLastINodeInPath(src);
1627            final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src);
1628            if (!iip.isSnapshot() //snapshots are readonly, so don't update atime.
1629                && doAccessTime && isAccessTimeSupported()) {
1630              final long now = now();
1631              if (now > inode.getAccessTime() + getAccessTimePrecision()) {
1632                // if we have to set access time but we only have the readlock, then
1633                // restart this entire operation with the writeLock.
1634                if (isReadOp) {
1635                  continue;
1636                }
1637                dir.setTimes(src, inode, -1, now, false, iip.getLatestSnapshot());
1638              }
1639            }
1640            final long fileSize = iip.isSnapshot() ?
1641                inode.computeFileSize(iip.getPathSnapshot())
1642                : inode.computeFileSizeNotIncludingLastUcBlock();
1643            boolean isUc = inode.isUnderConstruction();
1644            if (iip.isSnapshot()) {
1645              // if src indicates a snapshot file, we need to make sure the returned
1646              // blocks do not exceed the size of the snapshot file.
1647              length = Math.min(length, fileSize - offset);
1648              isUc = false;
1649            }
1650            LocatedBlocks blocks =
1651              blockManager.createLocatedBlocks(inode.getBlocks(), fileSize,
1652                isUc, offset, length, needBlockToken, iip.isSnapshot());
1653            // Set caching information for the located blocks.
1654            for (LocatedBlock lb: blocks.getLocatedBlocks()) {
1655              cacheManager.setCachedLocations(lb);
1656            }
1657            return blocks;
1658          } finally {
1659            if (isReadOp) {
1660              readUnlock();
1661            } else {
1662              writeUnlock();
1663            }
1664          }
1665        }
1666        return null; // can never reach here
1667      }
1668    
1669      /**
1670       * Moves all the blocks from srcs and appends them to trg
1671       * To avoid rollbacks we will verify validitity of ALL of the args
1672       * before we start actual move.
1673       * 
1674       * This does not support ".inodes" relative path
1675       * @param target
1676       * @param srcs
1677       * @throws IOException
1678       */
1679      void concat(String target, String [] srcs) 
1680          throws IOException, UnresolvedLinkException {
1681        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
1682        if (cacheEntry != null && cacheEntry.isSuccess()) {
1683          return; // Return previous response
1684        }
1685        
1686        // Either there is no previous request in progres or it has failed
1687        if(FSNamesystem.LOG.isDebugEnabled()) {
1688          FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) +
1689              " to " + target);
1690        }
1691        
1692        boolean success = false;
1693        try {
1694          concatInt(target, srcs, cacheEntry != null);
1695          success = true;
1696        } catch (AccessControlException e) {
1697          logAuditEvent(false, "concat", Arrays.toString(srcs), target, null);
1698          throw e;
1699        } finally {
1700          RetryCache.setState(cacheEntry, success);
1701        }
1702      }
1703    
1704      private void concatInt(String target, String [] srcs, 
1705          boolean logRetryCache) throws IOException, UnresolvedLinkException {
1706        // verify args
1707        if(target.isEmpty()) {
1708          throw new IllegalArgumentException("Target file name is empty");
1709        }
1710        if(srcs == null || srcs.length == 0) {
1711          throw new IllegalArgumentException("No sources given");
1712        }
1713        
1714        // We require all files be in the same directory
1715        String trgParent = 
1716          target.substring(0, target.lastIndexOf(Path.SEPARATOR_CHAR));
1717        for (String s : srcs) {
1718          String srcParent = s.substring(0, s.lastIndexOf(Path.SEPARATOR_CHAR));
1719          if (!srcParent.equals(trgParent)) {
1720            throw new IllegalArgumentException(
1721               "Sources and target are not in the same directory");
1722          }
1723        }
1724    
1725        HdfsFileStatus resultingStat = null;
1726        FSPermissionChecker pc = getPermissionChecker();
1727        checkOperation(OperationCategory.WRITE);
1728        writeLock();
1729        try {
1730          checkOperation(OperationCategory.WRITE);
1731          checkNameNodeSafeMode("Cannot concat " + target);
1732          concatInternal(pc, target, srcs, logRetryCache);
1733          resultingStat = getAuditFileInfo(target, false);
1734        } finally {
1735          writeUnlock();
1736        }
1737        getEditLog().logSync();
1738        logAuditEvent(true, "concat", Arrays.toString(srcs), target, resultingStat);
1739      }
1740    
1741      /** See {@link #concat(String, String[])} */
1742      private void concatInternal(FSPermissionChecker pc, String target,
1743          String[] srcs, boolean logRetryCache) throws IOException,
1744          UnresolvedLinkException {
1745        assert hasWriteLock();
1746    
1747        // write permission for the target
1748        if (isPermissionEnabled) {
1749          checkPathAccess(pc, target, FsAction.WRITE);
1750    
1751          // and srcs
1752          for(String aSrc: srcs) {
1753            checkPathAccess(pc, aSrc, FsAction.READ); // read the file
1754            checkParentAccess(pc, aSrc, FsAction.WRITE); // for delete 
1755          }
1756        }
1757    
1758        // to make sure no two files are the same
1759        Set<INode> si = new HashSet<INode>();
1760    
1761        // we put the following prerequisite for the operation
1762        // replication and blocks sizes should be the same for ALL the blocks
1763    
1764        // check the target
1765        final INodeFile trgInode = INodeFile.valueOf(dir.getINode4Write(target),
1766            target);
1767        if(trgInode.isUnderConstruction()) {
1768          throw new HadoopIllegalArgumentException("concat: target file "
1769              + target + " is under construction");
1770        }
1771        // per design target shouldn't be empty and all the blocks same size
1772        if(trgInode.numBlocks() == 0) {
1773          throw new HadoopIllegalArgumentException("concat: target file "
1774              + target + " is empty");
1775        }
1776        if (trgInode instanceof INodeFileWithSnapshot) {
1777          throw new HadoopIllegalArgumentException("concat: target file "
1778              + target + " is in a snapshot");
1779        }
1780    
1781        long blockSize = trgInode.getPreferredBlockSize();
1782    
1783        // check the end block to be full
1784        final BlockInfo last = trgInode.getLastBlock();
1785        if(blockSize != last.getNumBytes()) {
1786          throw new HadoopIllegalArgumentException("The last block in " + target
1787              + " is not full; last block size = " + last.getNumBytes()
1788              + " but file block size = " + blockSize);
1789        }
1790    
1791        si.add(trgInode);
1792        final short repl = trgInode.getFileReplication();
1793    
1794        // now check the srcs
1795        boolean endSrc = false; // final src file doesn't have to have full end block
1796        for(int i=0; i<srcs.length; i++) {
1797          String src = srcs[i];
1798          if(i==srcs.length-1)
1799            endSrc=true;
1800    
1801          final INodeFile srcInode = INodeFile.valueOf(dir.getINode4Write(src), src);
1802          if(src.isEmpty() 
1803              || srcInode.isUnderConstruction()
1804              || srcInode.numBlocks() == 0) {
1805            throw new HadoopIllegalArgumentException("concat: source file " + src
1806                + " is invalid or empty or underConstruction");
1807          }
1808    
1809          // check replication and blocks size
1810          if(repl != srcInode.getBlockReplication()) {
1811            throw new HadoopIllegalArgumentException("concat: the soruce file "
1812                + src + " and the target file " + target
1813                + " should have the same replication: source replication is "
1814                + srcInode.getBlockReplication()
1815                + " but target replication is " + repl);
1816          }
1817    
1818          //boolean endBlock=false;
1819          // verify that all the blocks are of the same length as target
1820          // should be enough to check the end blocks
1821          final BlockInfo[] srcBlocks = srcInode.getBlocks();
1822          int idx = srcBlocks.length-1;
1823          if(endSrc)
1824            idx = srcBlocks.length-2; // end block of endSrc is OK not to be full
1825          if(idx >= 0 && srcBlocks[idx].getNumBytes() != blockSize) {
1826            throw new HadoopIllegalArgumentException("concat: the soruce file "
1827                + src + " and the target file " + target
1828                + " should have the same blocks sizes: target block size is "
1829                + blockSize + " but the size of source block " + idx + " is "
1830                + srcBlocks[idx].getNumBytes());
1831          }
1832    
1833          si.add(srcInode);
1834        }
1835    
1836        // make sure no two files are the same
1837        if(si.size() < srcs.length+1) { // trg + srcs
1838          // it means at least two files are the same
1839          throw new HadoopIllegalArgumentException(
1840              "concat: at least two of the source files are the same");
1841        }
1842    
1843        if(NameNode.stateChangeLog.isDebugEnabled()) {
1844          NameNode.stateChangeLog.debug("DIR* NameSystem.concat: " + 
1845              Arrays.toString(srcs) + " to " + target);
1846        }
1847    
1848        dir.concat(target,srcs, logRetryCache);
1849      }
1850      
1851      /**
1852       * stores the modification and access time for this inode. 
1853       * The access time is precise upto an hour. The transaction, if needed, is
1854       * written to the edits log but is not flushed.
1855       */
1856      void setTimes(String src, long mtime, long atime) 
1857          throws IOException, UnresolvedLinkException {
1858        if (!isAccessTimeSupported() && atime != -1) {
1859          throw new IOException("Access time for hdfs is not configured. " +
1860                                " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
1861        }
1862        try {
1863          setTimesInt(src, mtime, atime);
1864        } catch (AccessControlException e) {
1865          logAuditEvent(false, "setTimes", src);
1866          throw e;
1867        }
1868      }
1869    
1870      private void setTimesInt(String src, long mtime, long atime) 
1871        throws IOException, UnresolvedLinkException {
1872        HdfsFileStatus resultingStat = null;
1873        FSPermissionChecker pc = getPermissionChecker();
1874        checkOperation(OperationCategory.WRITE);
1875        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1876        writeLock();
1877        try {
1878          checkOperation(OperationCategory.WRITE);
1879          checkNameNodeSafeMode("Cannot set times " + src);
1880          src = FSDirectory.resolvePath(src, pathComponents, dir);
1881    
1882          // Write access is required to set access and modification times
1883          if (isPermissionEnabled) {
1884            checkPathAccess(pc, src, FsAction.WRITE);
1885          }
1886          final INodesInPath iip = dir.getINodesInPath4Write(src);
1887          final INode inode = iip.getLastINode();
1888          if (inode != null) {
1889            dir.setTimes(src, inode, mtime, atime, true, iip.getLatestSnapshot());
1890            resultingStat = getAuditFileInfo(src, false);
1891          } else {
1892            throw new FileNotFoundException("File/Directory " + src + " does not exist.");
1893          }
1894        } finally {
1895          writeUnlock();
1896        }
1897        logAuditEvent(true, "setTimes", src, null, resultingStat);
1898      }
1899    
1900      /**
1901       * Create a symbolic link.
1902       */
1903      @SuppressWarnings("deprecation")
1904      void createSymlink(String target, String link,
1905          PermissionStatus dirPerms, boolean createParent) 
1906          throws IOException, UnresolvedLinkException {
1907        if (!FileSystem.areSymlinksEnabled()) {
1908          throw new UnsupportedOperationException("Symlinks not supported");
1909        }
1910        if (!DFSUtil.isValidName(link)) {
1911          throw new InvalidPathException("Invalid link name: " + link);
1912        }
1913        if (FSDirectory.isReservedName(target)) {
1914          throw new InvalidPathException("Invalid target name: " + target);
1915        }
1916        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
1917        if (cacheEntry != null && cacheEntry.isSuccess()) {
1918          return; // Return previous response
1919        }
1920        boolean success = false;
1921        try {
1922          createSymlinkInt(target, link, dirPerms, createParent, cacheEntry != null);
1923          success = true;
1924        } catch (AccessControlException e) {
1925          logAuditEvent(false, "createSymlink", link, target, null);
1926          throw e;
1927        } finally {
1928          RetryCache.setState(cacheEntry, success);
1929        }
1930      }
1931    
1932      private void createSymlinkInt(String target, String link,
1933          PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) 
1934          throws IOException, UnresolvedLinkException {
1935        if (NameNode.stateChangeLog.isDebugEnabled()) {
1936          NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target="
1937              + target + " link=" + link);
1938        }
1939        HdfsFileStatus resultingStat = null;
1940        FSPermissionChecker pc = getPermissionChecker();
1941        checkOperation(OperationCategory.WRITE);
1942        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(link);
1943        writeLock();
1944        try {
1945          checkOperation(OperationCategory.WRITE);
1946          checkNameNodeSafeMode("Cannot create symlink " + link);
1947          link = FSDirectory.resolvePath(link, pathComponents, dir);
1948          if (!createParent) {
1949            verifyParentDir(link);
1950          }
1951          if (!dir.isValidToCreate(link)) {
1952            throw new IOException("failed to create link " + link 
1953                +" either because the filename is invalid or the file exists");
1954          }
1955          if (isPermissionEnabled) {
1956            checkAncestorAccess(pc, link, FsAction.WRITE);
1957          }
1958          // validate that we have enough inodes.
1959          checkFsObjectLimit();
1960    
1961          // add symbolic link to namespace
1962          dir.addSymlink(link, target, dirPerms, createParent, logRetryCache);
1963          resultingStat = getAuditFileInfo(link, false);
1964        } finally {
1965          writeUnlock();
1966        }
1967        getEditLog().logSync();
1968        logAuditEvent(true, "createSymlink", link, target, resultingStat);
1969      }
1970    
1971      /**
1972       * Set replication for an existing file.
1973       * 
1974       * The NameNode sets new replication and schedules either replication of 
1975       * under-replicated data blocks or removal of the excessive block copies 
1976       * if the blocks are over-replicated.
1977       * 
1978       * @see ClientProtocol#setReplication(String, short)
1979       * @param src file name
1980       * @param replication new replication
1981       * @return true if successful; 
1982       *         false if file does not exist or is a directory
1983       */
1984      boolean setReplication(final String src, final short replication)
1985          throws IOException {
1986        try {
1987          return setReplicationInt(src, replication);
1988        } catch (AccessControlException e) {
1989          logAuditEvent(false, "setReplication", src);
1990          throw e;
1991        }
1992      }
1993    
1994      private boolean setReplicationInt(String src, final short replication)
1995          throws IOException {
1996        blockManager.verifyReplication(src, replication, null);
1997        final boolean isFile;
1998        FSPermissionChecker pc = getPermissionChecker();
1999        checkOperation(OperationCategory.WRITE);
2000        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2001        writeLock();
2002        try {
2003          checkOperation(OperationCategory.WRITE);
2004          checkNameNodeSafeMode("Cannot set replication for " + src);
2005          src = FSDirectory.resolvePath(src, pathComponents, dir);
2006          if (isPermissionEnabled) {
2007            checkPathAccess(pc, src, FsAction.WRITE);
2008          }
2009    
2010          final short[] blockRepls = new short[2]; // 0: old, 1: new
2011          final Block[] blocks = dir.setReplication(src, replication, blockRepls);
2012          isFile = blocks != null;
2013          if (isFile) {
2014            blockManager.setReplication(blockRepls[0], blockRepls[1], src, blocks);
2015          }
2016        } finally {
2017          writeUnlock();
2018        }
2019    
2020        getEditLog().logSync();
2021        if (isFile) {
2022          logAuditEvent(true, "setReplication", src);
2023        }
2024        return isFile;
2025      }
2026    
2027      long getPreferredBlockSize(String filename) 
2028          throws IOException, UnresolvedLinkException {
2029        FSPermissionChecker pc = getPermissionChecker();
2030        checkOperation(OperationCategory.READ);
2031        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(filename);
2032        readLock();
2033        try {
2034          checkOperation(OperationCategory.READ);
2035          filename = FSDirectory.resolvePath(filename, pathComponents, dir);
2036          if (isPermissionEnabled) {
2037            checkTraverse(pc, filename);
2038          }
2039          return dir.getPreferredBlockSize(filename);
2040        } finally {
2041          readUnlock();
2042        }
2043      }
2044    
2045      /**
2046       * Verify that parent directory of src exists.
2047       */
2048      private void verifyParentDir(String src) throws FileNotFoundException,
2049          ParentNotDirectoryException, UnresolvedLinkException {
2050        assert hasReadLock();
2051        Path parent = new Path(src).getParent();
2052        if (parent != null) {
2053          final INode parentNode = dir.getINode(parent.toString());
2054          if (parentNode == null) {
2055            throw new FileNotFoundException("Parent directory doesn't exist: "
2056                + parent);
2057          } else if (!parentNode.isDirectory() && !parentNode.isSymlink()) {
2058            throw new ParentNotDirectoryException("Parent path is not a directory: "
2059                + parent);
2060          }
2061        }
2062      }
2063      
2064      /**
2065       * Create a new file entry in the namespace.
2066       * 
2067       * For description of parameters and exceptions thrown see
2068       * {@link ClientProtocol#create()}, except it returns valid file status upon
2069       * success
2070       * 
2071       * For retryCache handling details see -
2072       * {@link #getFileStatus(boolean, CacheEntryWithPayload)}
2073       * 
2074       */
2075      HdfsFileStatus startFile(String src, PermissionStatus permissions,
2076          String holder, String clientMachine, EnumSet<CreateFlag> flag,
2077          boolean createParent, short replication, long blockSize)
2078          throws AccessControlException, SafeModeException,
2079          FileAlreadyExistsException, UnresolvedLinkException,
2080          FileNotFoundException, ParentNotDirectoryException, IOException {
2081        HdfsFileStatus status = null;
2082        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2083            null);
2084        if (cacheEntry != null && cacheEntry.isSuccess()) {
2085          return (HdfsFileStatus) cacheEntry.getPayload();
2086        }
2087        
2088        try {
2089          status = startFileInt(src, permissions, holder, clientMachine, flag,
2090              createParent, replication, blockSize, cacheEntry != null);
2091        } catch (AccessControlException e) {
2092          logAuditEvent(false, "create", src);
2093          throw e;
2094        } finally {
2095          RetryCache.setState(cacheEntry, status != null, status);
2096        }
2097        return status;
2098      }
2099    
2100      private HdfsFileStatus startFileInt(String src, PermissionStatus permissions,
2101          String holder, String clientMachine, EnumSet<CreateFlag> flag,
2102          boolean createParent, short replication, long blockSize,
2103          boolean logRetryCache) throws AccessControlException, SafeModeException,
2104          FileAlreadyExistsException, UnresolvedLinkException,
2105          FileNotFoundException, ParentNotDirectoryException, IOException {
2106        if (NameNode.stateChangeLog.isDebugEnabled()) {
2107          NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src
2108              + ", holder=" + holder
2109              + ", clientMachine=" + clientMachine
2110              + ", createParent=" + createParent
2111              + ", replication=" + replication
2112              + ", createFlag=" + flag.toString());
2113        }
2114        if (!DFSUtil.isValidName(src)) {
2115          throw new InvalidPathException(src);
2116        }
2117        blockManager.verifyReplication(src, replication, clientMachine);
2118    
2119        boolean skipSync = false;
2120        HdfsFileStatus stat = null;
2121        FSPermissionChecker pc = getPermissionChecker();
2122        checkOperation(OperationCategory.WRITE);
2123        if (blockSize < minBlockSize) {
2124          throw new IOException("Specified block size is less than configured" +
2125              " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY
2126              + "): " + blockSize + " < " + minBlockSize);
2127        }
2128        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2129        boolean create = flag.contains(CreateFlag.CREATE);
2130        boolean overwrite = flag.contains(CreateFlag.OVERWRITE);
2131        writeLock();
2132        try {
2133          checkOperation(OperationCategory.WRITE);
2134          checkNameNodeSafeMode("Cannot create file" + src);
2135          src = FSDirectory.resolvePath(src, pathComponents, dir);
2136          startFileInternal(pc, src, permissions, holder, clientMachine, create,
2137              overwrite, createParent, replication, blockSize, logRetryCache);
2138          stat = dir.getFileInfo(src, false);
2139        } catch (StandbyException se) {
2140          skipSync = true;
2141          throw se;
2142        } finally {
2143          writeUnlock();
2144          // There might be transactions logged while trying to recover the lease.
2145          // They need to be sync'ed even when an exception was thrown.
2146          if (!skipSync) {
2147            getEditLog().logSync();
2148          }
2149        } 
2150        logAuditEvent(true, "create", src, null, stat);
2151        return stat;
2152      }
2153    
2154      /**
2155       * Create a new file or overwrite an existing file<br>
2156       * 
2157       * Once the file is create the client then allocates a new block with the next
2158       * call using {@link NameNode#addBlock()}.
2159       * <p>
2160       * For description of parameters and exceptions thrown see
2161       * {@link ClientProtocol#create()}
2162       */
2163      private void startFileInternal(FSPermissionChecker pc, String src,
2164          PermissionStatus permissions, String holder, String clientMachine,
2165          boolean create, boolean overwrite, boolean createParent,
2166          short replication, long blockSize, boolean logRetryEntry)
2167          throws FileAlreadyExistsException, AccessControlException,
2168          UnresolvedLinkException, FileNotFoundException,
2169          ParentNotDirectoryException, IOException {
2170        assert hasWriteLock();
2171        // Verify that the destination does not exist as a directory already.
2172        final INodesInPath iip = dir.getINodesInPath4Write(src);
2173        final INode inode = iip.getLastINode();
2174        if (inode != null && inode.isDirectory()) {
2175          throw new FileAlreadyExistsException("Cannot create file " + src
2176              + "; already exists as a directory.");
2177        }
2178        final INodeFile myFile = INodeFile.valueOf(inode, src, true);
2179        if (isPermissionEnabled) {
2180          if (overwrite && myFile != null) {
2181            checkPathAccess(pc, src, FsAction.WRITE);
2182          } else {
2183            checkAncestorAccess(pc, src, FsAction.WRITE);
2184          }
2185        }
2186    
2187        if (!createParent) {
2188          verifyParentDir(src);
2189        }
2190    
2191        try {
2192          if (myFile == null) {
2193            if (!create) {
2194              throw new FileNotFoundException("failed to overwrite non-existent file "
2195                + src + " on client " + clientMachine);
2196            }
2197          } else {
2198            if (overwrite) {
2199              try {
2200                deleteInt(src, true, false); // File exists - delete if overwrite
2201              } catch (AccessControlException e) {
2202                logAuditEvent(false, "delete", src);
2203                throw e;
2204              }
2205            } else {
2206              // If lease soft limit time is expired, recover the lease
2207              recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2208              throw new FileAlreadyExistsException("failed to create file " + src
2209                  + " on client " + clientMachine + " because the file exists");
2210            }
2211          }
2212    
2213          checkFsObjectLimit();
2214          final DatanodeDescriptor clientNode = 
2215              blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
2216    
2217          INodeFileUnderConstruction newNode = dir.addFile(src, permissions,
2218              replication, blockSize, holder, clientMachine, clientNode);
2219          if (newNode == null) {
2220            throw new IOException("DIR* NameSystem.startFile: " +
2221                                  "Unable to add file to namespace.");
2222          }
2223          leaseManager.addLease(newNode.getClientName(), src);
2224    
2225          // record file record in log, record new generation stamp
2226          getEditLog().logOpenFile(src, newNode, logRetryEntry);
2227          if (NameNode.stateChangeLog.isDebugEnabled()) {
2228            NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: "
2229                                       +"add "+src+" to namespace for "+holder);
2230          }
2231        } catch (IOException ie) {
2232          NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: "
2233                                       +ie.getMessage());
2234          throw ie;
2235        }
2236      }
2237      
2238      /**
2239       * Append to an existing file for append.
2240       * <p>
2241       * 
2242       * The method returns the last block of the file if this is a partial block,
2243       * which can still be used for writing more data. The client uses the returned
2244       * block locations to form the data pipeline for this block.<br>
2245       * The method returns null if the last block is full. The client then
2246       * allocates a new block with the next call using {@link NameNode#addBlock()}.
2247       * <p>
2248       * 
2249       * For description of parameters and exceptions thrown see
2250       * {@link ClientProtocol#append(String, String)}
2251       * 
2252       * @return the last block locations if the block is partial or null otherwise
2253       */
2254      private LocatedBlock appendFileInternal(FSPermissionChecker pc, String src,
2255          String holder, String clientMachine, boolean logRetryCache)
2256          throws AccessControlException, UnresolvedLinkException,
2257          FileNotFoundException, IOException {
2258        assert hasWriteLock();
2259        // Verify that the destination does not exist as a directory already.
2260        final INodesInPath iip = dir.getINodesInPath4Write(src);
2261        final INode inode = iip.getLastINode();
2262        if (inode != null && inode.isDirectory()) {
2263          throw new FileAlreadyExistsException("Cannot append to directory " + src
2264              + "; already exists as a directory.");
2265        }
2266        if (isPermissionEnabled) {
2267          checkPathAccess(pc, src, FsAction.WRITE);
2268        }
2269    
2270        try {
2271          if (inode == null) {
2272            throw new FileNotFoundException("failed to append to non-existent file "
2273              + src + " on client " + clientMachine);
2274          }
2275          INodeFile myFile = INodeFile.valueOf(inode, src, true);
2276          // Opening an existing file for write - may need to recover lease.
2277          recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2278          
2279          // recoverLeaseInternal may create a new InodeFile via 
2280          // finalizeINodeFileUnderConstruction so we need to refresh 
2281          // the referenced file.  
2282          myFile = INodeFile.valueOf(dir.getINode(src), src, true);
2283          
2284          final DatanodeDescriptor clientNode = 
2285              blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
2286          return prepareFileForWrite(src, myFile, holder, clientMachine, clientNode,
2287              true, iip.getLatestSnapshot(), logRetryCache);
2288        } catch (IOException ie) {
2289          NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage());
2290          throw ie;
2291        }
2292      }
2293      
2294      /**
2295       * Replace current node with a INodeUnderConstruction.
2296       * Recreate in-memory lease record.
2297       * 
2298       * @param src path to the file
2299       * @param file existing file object
2300       * @param leaseHolder identifier of the lease holder on this file
2301       * @param clientMachine identifier of the client machine
2302       * @param clientNode if the client is collocated with a DN, that DN's descriptor
2303       * @param writeToEditLog whether to persist this change to the edit log
2304       * @param logRetryCache whether to record RPC ids in editlog for retry cache
2305       *                      rebuilding
2306       * @return the last block locations if the block is partial or null otherwise
2307       * @throws UnresolvedLinkException
2308       * @throws IOException
2309       */
2310      LocatedBlock prepareFileForWrite(String src, INodeFile file,
2311          String leaseHolder, String clientMachine, DatanodeDescriptor clientNode,
2312          boolean writeToEditLog, Snapshot latestSnapshot, boolean logRetryCache)
2313          throws IOException {
2314        file = file.recordModification(latestSnapshot, dir.getINodeMap());
2315        final INodeFileUnderConstruction cons = file.toUnderConstruction(
2316            leaseHolder, clientMachine, clientNode);
2317    
2318        dir.replaceINodeFile(src, file, cons);
2319        leaseManager.addLease(cons.getClientName(), src);
2320        
2321        LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons);
2322        if (writeToEditLog) {
2323          getEditLog().logOpenFile(src, cons, logRetryCache);
2324        }
2325        return ret;
2326      }
2327    
2328      /**
2329       * Recover lease;
2330       * Immediately revoke the lease of the current lease holder and start lease
2331       * recovery so that the file can be forced to be closed.
2332       * 
2333       * @param src the path of the file to start lease recovery
2334       * @param holder the lease holder's name
2335       * @param clientMachine the client machine's name
2336       * @return true if the file is already closed
2337       * @throws IOException
2338       */
2339      boolean recoverLease(String src, String holder, String clientMachine)
2340          throws IOException {
2341        if (!DFSUtil.isValidName(src)) {
2342          throw new IOException("Invalid file name: " + src);
2343        }
2344      
2345        boolean skipSync = false;
2346        FSPermissionChecker pc = getPermissionChecker();
2347        checkOperation(OperationCategory.WRITE);
2348        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2349        writeLock();
2350        try {
2351          checkOperation(OperationCategory.WRITE);
2352          checkNameNodeSafeMode("Cannot recover the lease of " + src);
2353          src = FSDirectory.resolvePath(src, pathComponents, dir);
2354          final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
2355          if (!inode.isUnderConstruction()) {
2356            return true;
2357          }
2358          if (isPermissionEnabled) {
2359            checkPathAccess(pc, src, FsAction.WRITE);
2360          }
2361      
2362          recoverLeaseInternal(inode, src, holder, clientMachine, true);
2363        } catch (StandbyException se) {
2364          skipSync = true;
2365          throw se;
2366        } finally {
2367          writeUnlock();
2368          // There might be transactions logged while trying to recover the lease.
2369          // They need to be sync'ed even when an exception was thrown.
2370          if (!skipSync) {
2371            getEditLog().logSync();
2372          }
2373        }
2374        return false;
2375      }
2376    
2377      private void recoverLeaseInternal(INodeFile fileInode, 
2378          String src, String holder, String clientMachine, boolean force)
2379          throws IOException {
2380        assert hasWriteLock();
2381        if (fileInode != null && fileInode.isUnderConstruction()) {
2382          INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction) fileInode;
2383          //
2384          // If the file is under construction , then it must be in our
2385          // leases. Find the appropriate lease record.
2386          //
2387          Lease lease = leaseManager.getLease(holder);
2388          //
2389          // We found the lease for this file. And surprisingly the original
2390          // holder is trying to recreate this file. This should never occur.
2391          //
2392          if (!force && lease != null) {
2393            Lease leaseFile = leaseManager.getLeaseByPath(src);
2394            if ((leaseFile != null && leaseFile.equals(lease)) ||
2395                lease.getHolder().equals(holder)) { 
2396              throw new AlreadyBeingCreatedException(
2397                "failed to create file " + src + " for " + holder +
2398                " on client " + clientMachine + 
2399                " because current leaseholder is trying to recreate file.");
2400            }
2401          }
2402          //
2403          // Find the original holder.
2404          //
2405          lease = leaseManager.getLease(pendingFile.getClientName());
2406          if (lease == null) {
2407            throw new AlreadyBeingCreatedException(
2408              "failed to create file " + src + " for " + holder +
2409              " on client " + clientMachine + 
2410              " because pendingCreates is non-null but no leases found.");
2411          }
2412          if (force) {
2413            // close now: no need to wait for soft lease expiration and 
2414            // close only the file src
2415            LOG.info("recoverLease: " + lease + ", src=" + src +
2416              " from client " + pendingFile.getClientName());
2417            internalReleaseLease(lease, src, holder);
2418          } else {
2419            assert lease.getHolder().equals(pendingFile.getClientName()) :
2420              "Current lease holder " + lease.getHolder() +
2421              " does not match file creator " + pendingFile.getClientName();
2422            //
2423            // If the original holder has not renewed in the last SOFTLIMIT 
2424            // period, then start lease recovery.
2425            //
2426            if (lease.expiredSoftLimit()) {
2427              LOG.info("startFile: recover " + lease + ", src=" + src + " client "
2428                  + pendingFile.getClientName());
2429              boolean isClosed = internalReleaseLease(lease, src, null);
2430              if(!isClosed)
2431                throw new RecoveryInProgressException(
2432                    "Failed to close file " + src +
2433                    ". Lease recovery is in progress. Try again later.");
2434            } else {
2435              final BlockInfo lastBlock = pendingFile.getLastBlock();
2436              if (lastBlock != null
2437                  && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) {
2438                throw new RecoveryInProgressException("Recovery in progress, file ["
2439                    + src + "], " + "lease owner [" + lease.getHolder() + "]");
2440              } else {
2441                throw new AlreadyBeingCreatedException("Failed to create file ["
2442                    + src + "] for [" + holder + "] on client [" + clientMachine
2443                    + "], because this file is already being created by ["
2444                    + pendingFile.getClientName() + "] on ["
2445                    + pendingFile.getClientMachine() + "]");
2446              }
2447            }
2448          }
2449        }
2450      }
2451    
2452      /**
2453       * Append to an existing file in the namespace.
2454       */
2455      LocatedBlock appendFile(String src, String holder, String clientMachine)
2456          throws AccessControlException, SafeModeException,
2457          FileAlreadyExistsException, FileNotFoundException,
2458          ParentNotDirectoryException, IOException {
2459        LocatedBlock lb = null;
2460        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2461            null);
2462        if (cacheEntry != null && cacheEntry.isSuccess()) {
2463          return (LocatedBlock) cacheEntry.getPayload();
2464        }
2465          
2466        boolean success = false;
2467        try {
2468          lb = appendFileInt(src, holder, clientMachine, cacheEntry != null);
2469          success = true;
2470          return lb;
2471        } catch (AccessControlException e) {
2472          logAuditEvent(false, "append", src);
2473          throw e;
2474        } finally {
2475          RetryCache.setState(cacheEntry, success, lb);
2476        }
2477      }
2478    
2479      private LocatedBlock appendFileInt(String src, String holder,
2480          String clientMachine, boolean logRetryCache)
2481          throws AccessControlException, SafeModeException,
2482          FileAlreadyExistsException, FileNotFoundException,
2483          ParentNotDirectoryException, IOException {
2484        if (NameNode.stateChangeLog.isDebugEnabled()) {
2485          NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src
2486              + ", holder=" + holder
2487              + ", clientMachine=" + clientMachine);
2488        }
2489        boolean skipSync = false;
2490        if (!supportAppends) {
2491          throw new UnsupportedOperationException(
2492              "Append is not enabled on this NameNode. Use the " +
2493              DFS_SUPPORT_APPEND_KEY + " configuration option to enable it.");
2494        }
2495    
2496        LocatedBlock lb = null;
2497        FSPermissionChecker pc = getPermissionChecker();
2498        checkOperation(OperationCategory.WRITE);
2499        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2500        writeLock();
2501        try {
2502          checkOperation(OperationCategory.WRITE);
2503          checkNameNodeSafeMode("Cannot append to file" + src);
2504          src = FSDirectory.resolvePath(src, pathComponents, dir);
2505          lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache);
2506        } catch (StandbyException se) {
2507          skipSync = true;
2508          throw se;
2509        } finally {
2510          writeUnlock();
2511          // There might be transactions logged while trying to recover the lease.
2512          // They need to be sync'ed even when an exception was thrown.
2513          if (!skipSync) {
2514            getEditLog().logSync();
2515          }
2516        }
2517        if (lb != null) {
2518          if (NameNode.stateChangeLog.isDebugEnabled()) {
2519            NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
2520                +src+" for "+holder+" at "+clientMachine
2521                +" block " + lb.getBlock()
2522                +" block size " + lb.getBlock().getNumBytes());
2523          }
2524        }
2525        logAuditEvent(true, "append", src);
2526        return lb;
2527      }
2528    
2529      ExtendedBlock getExtendedBlock(Block blk) {
2530        return new ExtendedBlock(blockPoolId, blk);
2531      }
2532      
2533      void setBlockPoolId(String bpid) {
2534        blockPoolId = bpid;
2535        blockManager.setBlockPoolId(blockPoolId);
2536      }
2537    
2538      /**
2539       * The client would like to obtain an additional block for the indicated
2540       * filename (which is being written-to).  Return an array that consists
2541       * of the block, plus a set of machines.  The first on this list should
2542       * be where the client writes data.  Subsequent items in the list must
2543       * be provided in the connection to the first datanode.
2544       *
2545       * Make sure the previous blocks have been reported by datanodes and
2546       * are replicated.  Will return an empty 2-elt array if we want the
2547       * client to "try again later".
2548       */
2549      LocatedBlock getAdditionalBlock(String src, long fileId, String clientName,
2550          ExtendedBlock previous, Set<Node> excludedNodes, 
2551          List<String> favoredNodes)
2552          throws LeaseExpiredException, NotReplicatedYetException,
2553          QuotaExceededException, SafeModeException, UnresolvedLinkException,
2554          IOException {
2555        long blockSize;
2556        int replication;
2557        DatanodeDescriptor clientNode = null;
2558    
2559        if(NameNode.stateChangeLog.isDebugEnabled()) {
2560          NameNode.stateChangeLog.debug(
2561              "BLOCK* NameSystem.getAdditionalBlock: file "
2562              +src+" for "+clientName);
2563        }
2564    
2565        // Part I. Analyze the state of the file with respect to the input data.
2566        checkOperation(OperationCategory.READ);
2567        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2568        readLock();
2569        try {
2570          checkOperation(OperationCategory.READ);
2571          src = FSDirectory.resolvePath(src, pathComponents, dir);
2572          LocatedBlock[] onRetryBlock = new LocatedBlock[1];
2573          final INode[] inodes = analyzeFileState(
2574              src, fileId, clientName, previous, onRetryBlock).getINodes();
2575          final INodeFileUnderConstruction pendingFile =
2576              (INodeFileUnderConstruction) inodes[inodes.length - 1].asFile();
2577    
2578          if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) {
2579            // This is a retry. Just return the last block if having locations.
2580            return onRetryBlock[0];
2581          }
2582          if (pendingFile.getBlocks().length >= maxBlocksPerFile) {
2583            throw new IOException("File has reached the limit on maximum number of"
2584                + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY
2585                + "): " + pendingFile.getBlocks().length + " >= "
2586                + maxBlocksPerFile);
2587          }
2588          blockSize = pendingFile.getPreferredBlockSize();
2589          clientNode = pendingFile.getClientNode();
2590          replication = pendingFile.getFileReplication();
2591        } finally {
2592          readUnlock();
2593        }
2594    
2595        // choose targets for the new block to be allocated.
2596        final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget( 
2597            src, replication, clientNode, excludedNodes, blockSize, favoredNodes);
2598    
2599        // Part II.
2600        // Allocate a new block, add it to the INode and the BlocksMap. 
2601        Block newBlock = null;
2602        long offset;
2603        checkOperation(OperationCategory.WRITE);
2604        writeLock();
2605        try {
2606          checkOperation(OperationCategory.WRITE);
2607          // Run the full analysis again, since things could have changed
2608          // while chooseTarget() was executing.
2609          LocatedBlock[] onRetryBlock = new LocatedBlock[1];
2610          INodesInPath inodesInPath =
2611              analyzeFileState(src, fileId, clientName, previous, onRetryBlock);
2612          final INode[] inodes = inodesInPath.getINodes();
2613          final INodeFileUnderConstruction pendingFile =
2614              (INodeFileUnderConstruction) inodes[inodes.length - 1].asFile();
2615    
2616          if (onRetryBlock[0] != null) {
2617            if (onRetryBlock[0].getLocations().length > 0) {
2618              // This is a retry. Just return the last block if having locations.
2619              return onRetryBlock[0];
2620            } else {
2621              // add new chosen targets to already allocated block and return
2622              BlockInfo lastBlockInFile = pendingFile.getLastBlock();
2623              ((BlockInfoUnderConstruction) lastBlockInFile)
2624                  .setExpectedLocations(targets);
2625              offset = pendingFile.computeFileSize();
2626              return makeLocatedBlock(lastBlockInFile, targets, offset);
2627            }
2628          }
2629    
2630          // commit the last block and complete it if it has minimum replicas
2631          commitOrCompleteLastBlock(pendingFile,
2632                                    ExtendedBlock.getLocalBlock(previous));
2633    
2634          // allocate new block, record block locations in INode.
2635          newBlock = createNewBlock();
2636          saveAllocatedBlock(src, inodesInPath, newBlock, targets);
2637    
2638          dir.persistNewBlock(src, pendingFile);
2639          offset = pendingFile.computeFileSize();
2640        } finally {
2641          writeUnlock();
2642        }
2643        getEditLog().logSync();
2644    
2645        // Return located block
2646        return makeLocatedBlock(newBlock, targets, offset);
2647      }
2648    
2649      INodesInPath analyzeFileState(String src,
2650                                    long fileId,
2651                                    String clientName,
2652                                    ExtendedBlock previous,
2653                                    LocatedBlock[] onRetryBlock)
2654              throws IOException  {
2655        assert hasReadLock();
2656    
2657        checkBlock(previous);
2658        onRetryBlock[0] = null;
2659        checkOperation(OperationCategory.WRITE);
2660        checkNameNodeSafeMode("Cannot add block to " + src);
2661    
2662        // have we exceeded the configured limit of fs objects.
2663        checkFsObjectLimit();
2664    
2665        Block previousBlock = ExtendedBlock.getLocalBlock(previous);
2666        final INodesInPath iip = dir.getINodesInPath4Write(src);
2667        final INodeFileUnderConstruction pendingFile
2668            = checkLease(src, fileId, clientName, iip.getLastINode());
2669        BlockInfo lastBlockInFile = pendingFile.getLastBlock();
2670        if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) {
2671          // The block that the client claims is the current last block
2672          // doesn't match up with what we think is the last block. There are
2673          // four possibilities:
2674          // 1) This is the first block allocation of an append() pipeline
2675          //    which started appending exactly at a block boundary.
2676          //    In this case, the client isn't passed the previous block,
2677          //    so it makes the allocateBlock() call with previous=null.
2678          //    We can distinguish this since the last block of the file
2679          //    will be exactly a full block.
2680          // 2) This is a retry from a client that missed the response of a
2681          //    prior getAdditionalBlock() call, perhaps because of a network
2682          //    timeout, or because of an HA failover. In that case, we know
2683          //    by the fact that the client is re-issuing the RPC that it
2684          //    never began to write to the old block. Hence it is safe to
2685          //    to return the existing block.
2686          // 3) This is an entirely bogus request/bug -- we should error out
2687          //    rather than potentially appending a new block with an empty
2688          //    one in the middle, etc
2689          // 4) This is a retry from a client that timed out while
2690          //    the prior getAdditionalBlock() is still being processed,
2691          //    currently working on chooseTarget(). 
2692          //    There are no means to distinguish between the first and 
2693          //    the second attempts in Part I, because the first one hasn't
2694          //    changed the namesystem state yet.
2695          //    We run this analysis again in Part II where case 4 is impossible.
2696    
2697          BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
2698          if (previous == null &&
2699              lastBlockInFile != null &&
2700              lastBlockInFile.getNumBytes() == pendingFile.getPreferredBlockSize() &&
2701              lastBlockInFile.isComplete()) {
2702            // Case 1
2703            if (NameNode.stateChangeLog.isDebugEnabled()) {
2704               NameNode.stateChangeLog.debug(
2705                   "BLOCK* NameSystem.allocateBlock: handling block allocation" +
2706                   " writing to a file with a complete previous block: src=" +
2707                   src + " lastBlock=" + lastBlockInFile);
2708            }
2709          } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) {
2710            if (lastBlockInFile.getNumBytes() != 0) {
2711              throw new IOException(
2712                  "Request looked like a retry to allocate block " +
2713                  lastBlockInFile + " but it already contains " +
2714                  lastBlockInFile.getNumBytes() + " bytes");
2715            }
2716    
2717            // Case 2
2718            // Return the last block.
2719            NameNode.stateChangeLog.info("BLOCK* allocateBlock: " +
2720                "caught retry for allocation of a new block in " +
2721                src + ". Returning previously allocated block " + lastBlockInFile);
2722            long offset = pendingFile.computeFileSize();
2723            onRetryBlock[0] = makeLocatedBlock(lastBlockInFile,
2724                ((BlockInfoUnderConstruction)lastBlockInFile).getExpectedStorageLocations(),
2725                offset);
2726            return iip;
2727          } else {
2728            // Case 3
2729            throw new IOException("Cannot allocate block in " + src + ": " +
2730                "passed 'previous' block " + previous + " does not match actual " +
2731                "last block in file " + lastBlockInFile);
2732          }
2733        }
2734    
2735        // Check if the penultimate block is minimally replicated
2736        if (!checkFileProgress(pendingFile, false)) {
2737          throw new NotReplicatedYetException("Not replicated yet: " + src);
2738        }
2739        return iip;
2740      }
2741    
2742      LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs,
2743                                            long offset) throws IOException {
2744        LocatedBlock lBlk = new LocatedBlock(
2745            getExtendedBlock(blk), locs, offset, false);
2746        getBlockManager().setBlockToken(
2747            lBlk, BlockTokenSecretManager.AccessMode.WRITE);
2748        return lBlk;
2749      }
2750    
2751      /** @see NameNode#getAdditionalDatanode(String, ExtendedBlock, DatanodeInfo[], DatanodeInfo[], int, String) */
2752      LocatedBlock getAdditionalDatanode(String src, final ExtendedBlock blk,
2753          final DatanodeInfo[] existings, final String[] storageIDs,
2754          final Set<Node> excludes,
2755          final int numAdditionalNodes, final String clientName
2756          ) throws IOException {
2757        //check if the feature is enabled
2758        dtpReplaceDatanodeOnFailure.checkEnabled();
2759    
2760        final DatanodeDescriptor clientnode;
2761        final long preferredblocksize;
2762        final List<DatanodeStorageInfo> chosen;
2763        checkOperation(OperationCategory.READ);
2764        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2765        readLock();
2766        try {
2767          checkOperation(OperationCategory.READ);
2768          //check safe mode
2769          checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk);
2770          src = FSDirectory.resolvePath(src, pathComponents, dir);
2771    
2772          //check lease
2773          final INodeFileUnderConstruction file = checkLease(src, clientName);
2774          clientnode = file.getClientNode();
2775          preferredblocksize = file.getPreferredBlockSize();
2776    
2777          //find datanode storages
2778          final DatanodeManager dm = blockManager.getDatanodeManager();
2779          chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs));
2780        } finally {
2781          readUnlock();
2782        }
2783    
2784        // choose new datanodes.
2785        final DatanodeStorageInfo[] targets = blockManager.getBlockPlacementPolicy(
2786            ).chooseTarget(src, numAdditionalNodes, clientnode, chosen, true,
2787                // TODO: get storage type from the file
2788            excludes, preferredblocksize, StorageType.DEFAULT);
2789        final LocatedBlock lb = new LocatedBlock(blk, targets);
2790        blockManager.setBlockToken(lb, AccessMode.COPY);
2791        return lb;
2792      }
2793    
2794      /**
2795       * The client would like to let go of the given block
2796       */
2797      boolean abandonBlock(ExtendedBlock b, String src, String holder)
2798          throws LeaseExpiredException, FileNotFoundException,
2799          UnresolvedLinkException, IOException {
2800        if(NameNode.stateChangeLog.isDebugEnabled()) {
2801          NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b
2802              + "of file " + src);
2803        }
2804        checkOperation(OperationCategory.WRITE);
2805        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2806        writeLock();
2807        try {
2808          checkOperation(OperationCategory.WRITE);
2809          checkNameNodeSafeMode("Cannot abandon block " + b + " for fle" + src);
2810          src = FSDirectory.resolvePath(src, pathComponents, dir);
2811    
2812          //
2813          // Remove the block from the pending creates list
2814          //
2815          INodeFileUnderConstruction file = checkLease(src, holder);
2816          boolean removed = dir.removeBlock(src, file,
2817              ExtendedBlock.getLocalBlock(b));
2818          if (!removed) {
2819            return true;
2820          }
2821          if(NameNode.stateChangeLog.isDebugEnabled()) {
2822            NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: "
2823                                          + b + " is removed from pendingCreates");
2824          }
2825          dir.persistBlocks(src, file, false);
2826        } finally {
2827          writeUnlock();
2828        }
2829        getEditLog().logSync();
2830    
2831        return true;
2832      }
2833      
2834      /** make sure that we still have the lease on this file. */
2835      private INodeFileUnderConstruction checkLease(String src, String holder)
2836          throws LeaseExpiredException, UnresolvedLinkException,
2837          FileNotFoundException {
2838        return checkLease(src, INodeId.GRANDFATHER_INODE_ID, holder,
2839            dir.getINode(src));
2840      }
2841      
2842      private INodeFileUnderConstruction checkLease(String src, long fileId,
2843          String holder, INode inode) throws LeaseExpiredException,
2844          FileNotFoundException {
2845        assert hasReadLock();
2846        if (inode == null || !inode.isFile()) {
2847          Lease lease = leaseManager.getLease(holder);
2848          throw new LeaseExpiredException(
2849              "No lease on " + src + ": File does not exist. "
2850              + (lease != null ? lease.toString()
2851                  : "Holder " + holder + " does not have any open files."));
2852        }
2853        final INodeFile file = inode.asFile();
2854        if (!file.isUnderConstruction()) {
2855          Lease lease = leaseManager.getLease(holder);
2856          throw new LeaseExpiredException(
2857              "No lease on " + src + ": File is not open for writing. "
2858              + (lease != null ? lease.toString()
2859                  : "Holder " + holder + " does not have any open files."));
2860        }
2861        INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction)file;
2862        if (holder != null && !pendingFile.getClientName().equals(holder)) {
2863          throw new LeaseExpiredException("Lease mismatch on " + src + " owned by "
2864              + pendingFile.getClientName() + " but is accessed by " + holder);
2865        }
2866        INodeId.checkId(fileId, pendingFile);
2867        return pendingFile;
2868      }
2869     
2870      /**
2871       * Complete in-progress write to the given file.
2872       * @return true if successful, false if the client should continue to retry
2873       *         (e.g if not all blocks have reached minimum replication yet)
2874       * @throws IOException on error (eg lease mismatch, file not open, file deleted)
2875       */
2876      boolean completeFile(String src, String holder,
2877                           ExtendedBlock last, long fileId)
2878        throws SafeModeException, UnresolvedLinkException, IOException {
2879        if (NameNode.stateChangeLog.isDebugEnabled()) {
2880          NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " +
2881              src + " for " + holder);
2882        }
2883        checkBlock(last);
2884        boolean success = false;
2885        checkOperation(OperationCategory.WRITE);
2886        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2887        writeLock();
2888        try {
2889          checkOperation(OperationCategory.WRITE);
2890          checkNameNodeSafeMode("Cannot complete file " + src);
2891          src = FSDirectory.resolvePath(src, pathComponents, dir);
2892          success = completeFileInternal(src, holder,
2893            ExtendedBlock.getLocalBlock(last), fileId);
2894        } finally {
2895          writeUnlock();
2896        }
2897        getEditLog().logSync();
2898        if (success) {
2899          NameNode.stateChangeLog.info("DIR* completeFile: " + src
2900              + " is closed by " + holder);
2901        }
2902        return success;
2903      }
2904    
2905      private boolean completeFileInternal(String src, 
2906          String holder, Block last, long fileId) throws SafeModeException,
2907          UnresolvedLinkException, IOException {
2908        assert hasWriteLock();
2909        final INodesInPath iip = dir.getLastINodeInPath(src);
2910        final INodeFileUnderConstruction pendingFile;
2911        try {
2912          pendingFile = checkLease(src, fileId, holder, iip.getINode(0));
2913        } catch (LeaseExpiredException lee) {
2914          final INode inode = dir.getINode(src);
2915          if (inode != null
2916              && inode.isFile()
2917              && !inode.asFile().isUnderConstruction()) {
2918            // This could be a retry RPC - i.e the client tried to close
2919            // the file, but missed the RPC response. Thus, it is trying
2920            // again to close the file. If the file still exists and
2921            // the client's view of the last block matches the actual
2922            // last block, then we'll treat it as a successful close.
2923            // See HDFS-3031.
2924            final Block realLastBlock = inode.asFile().getLastBlock();
2925            if (Block.matchingIdAndGenStamp(last, realLastBlock)) {
2926              NameNode.stateChangeLog.info("DIR* completeFile: " +
2927                  "request from " + holder + " to complete " + src +
2928                  " which is already closed. But, it appears to be an RPC " +
2929                  "retry. Returning success");
2930              return true;
2931            }
2932          }
2933          throw lee;
2934        }
2935        // Check the state of the penultimate block. It should be completed
2936        // before attempting to complete the last one.
2937        if (!checkFileProgress(pendingFile, false)) {
2938          return false;
2939        }
2940    
2941        // commit the last block and complete it if it has minimum replicas
2942        commitOrCompleteLastBlock(pendingFile, last);
2943    
2944        if (!checkFileProgress(pendingFile, true)) {
2945          return false;
2946        }
2947    
2948        finalizeINodeFileUnderConstruction(src, pendingFile,
2949            iip.getLatestSnapshot());
2950        return true;
2951      }
2952    
2953      /**
2954       * Save allocated block at the given pending filename
2955       * 
2956       * @param src path to the file
2957       * @param inodesInPath representing each of the components of src. 
2958       *                     The last INode is the INode for the file.
2959       * @throws QuotaExceededException If addition of block exceeds space quota
2960       */
2961      BlockInfo saveAllocatedBlock(String src, INodesInPath inodes,
2962          Block newBlock, DatanodeStorageInfo[] targets)
2963              throws IOException {
2964        assert hasWriteLock();
2965        BlockInfo b = dir.addBlock(src, inodes, newBlock, targets);
2966        NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + src + ". "
2967            + getBlockPoolId() + " " + b);
2968        DatanodeStorageInfo.incrementBlocksScheduled(targets);
2969        return b;
2970      }
2971    
2972      /**
2973       * Create new block with a unique block id and a new generation stamp.
2974       */
2975      Block createNewBlock() throws IOException {
2976        assert hasWriteLock();
2977        Block b = new Block(nextBlockId(), 0, 0);
2978        // Increment the generation stamp for every new block.
2979        b.setGenerationStamp(nextGenerationStamp(false));
2980        return b;
2981      }
2982    
2983      /**
2984       * Check that the indicated file's blocks are present and
2985       * replicated.  If not, return false. If checkall is true, then check
2986       * all blocks, otherwise check only penultimate block.
2987       */
2988      boolean checkFileProgress(INodeFile v, boolean checkall) {
2989        readLock();
2990        try {
2991          if (checkall) {
2992            //
2993            // check all blocks of the file.
2994            //
2995            for (BlockInfo block: v.getBlocks()) {
2996              if (!block.isComplete()) {
2997                LOG.info("BLOCK* checkFileProgress: " + block
2998                    + " has not reached minimal replication "
2999                    + blockManager.minReplication);
3000                return false;
3001              }
3002            }
3003          } else {
3004            //
3005            // check the penultimate block of this file
3006            //
3007            BlockInfo b = v.getPenultimateBlock();
3008            if (b != null && !b.isComplete()) {
3009              LOG.warn("BLOCK* checkFileProgress: " + b
3010                  + " has not reached minimal replication "
3011                  + blockManager.minReplication);
3012              return false;
3013            }
3014          }
3015          return true;
3016        } finally {
3017          readUnlock();
3018        }
3019      }
3020    
3021      ////////////////////////////////////////////////////////////////
3022      // Here's how to handle block-copy failure during client write:
3023      // -- As usual, the client's write should result in a streaming
3024      // backup write to a k-machine sequence.
3025      // -- If one of the backup machines fails, no worries.  Fail silently.
3026      // -- Before client is allowed to close and finalize file, make sure
3027      // that the blocks are backed up.  Namenode may have to issue specific backup
3028      // commands to make up for earlier datanode failures.  Once all copies
3029      // are made, edit namespace and return to client.
3030      ////////////////////////////////////////////////////////////////
3031    
3032      /** 
3033       * Change the indicated filename. 
3034       * @deprecated Use {@link #renameTo(String, String, Options.Rename...)} instead.
3035       */
3036      @Deprecated
3037      boolean renameTo(String src, String dst) 
3038          throws IOException, UnresolvedLinkException {
3039        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3040        if (cacheEntry != null && cacheEntry.isSuccess()) {
3041          return true; // Return previous response
3042        }
3043        boolean ret = false;
3044        try {
3045          ret = renameToInt(src, dst, cacheEntry != null);
3046        } catch (AccessControlException e) {
3047          logAuditEvent(false, "rename", src, dst, null);
3048          throw e;
3049        } finally {
3050          RetryCache.setState(cacheEntry, ret);
3051        }
3052        return ret;
3053      }
3054    
3055      private boolean renameToInt(String src, String dst, boolean logRetryCache) 
3056        throws IOException, UnresolvedLinkException {
3057        if (NameNode.stateChangeLog.isDebugEnabled()) {
3058          NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src +
3059              " to " + dst);
3060        }
3061        if (!DFSUtil.isValidName(dst)) {
3062          throw new IOException("Invalid name: " + dst);
3063        }
3064        FSPermissionChecker pc = getPermissionChecker();
3065        checkOperation(OperationCategory.WRITE);
3066        byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3067        byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3068        boolean status = false;
3069        HdfsFileStatus resultingStat = null;
3070        writeLock();
3071        try {
3072          checkOperation(OperationCategory.WRITE);
3073          checkNameNodeSafeMode("Cannot rename " + src);
3074          src = FSDirectory.resolvePath(src, srcComponents, dir);
3075          dst = FSDirectory.resolvePath(dst, dstComponents, dir);
3076          checkOperation(OperationCategory.WRITE);
3077          status = renameToInternal(pc, src, dst, logRetryCache);
3078          if (status) {
3079            resultingStat = getAuditFileInfo(dst, false);
3080          }
3081        } finally {
3082          writeUnlock();
3083        }
3084        getEditLog().logSync();
3085        if (status) {
3086          logAuditEvent(true, "rename", src, dst, resultingStat);
3087        }
3088        return status;
3089      }
3090    
3091      /** @deprecated See {@link #renameTo(String, String)} */
3092      @Deprecated
3093      private boolean renameToInternal(FSPermissionChecker pc, String src,
3094          String dst, boolean logRetryCache) throws IOException,
3095          UnresolvedLinkException {
3096        assert hasWriteLock();
3097        if (isPermissionEnabled) {
3098          //We should not be doing this.  This is move() not renameTo().
3099          //but for now,
3100          //NOTE: yes, this is bad!  it's assuming much lower level behavior
3101          //      of rewriting the dst
3102          String actualdst = dir.isDir(dst)?
3103              dst + Path.SEPARATOR + new Path(src).getName(): dst;
3104          // Rename does not operates on link targets
3105          // Do not resolveLink when checking permissions of src and dst
3106          // Check write access to parent of src
3107          checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
3108          // Check write access to ancestor of dst
3109          checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null,
3110              false);
3111        }
3112    
3113        if (dir.renameTo(src, dst, logRetryCache)) {
3114          return true;
3115        }
3116        return false;
3117      }
3118      
3119    
3120      /** Rename src to dst */
3121      void renameTo(String src, String dst, Options.Rename... options)
3122          throws IOException, UnresolvedLinkException {
3123        if (NameNode.stateChangeLog.isDebugEnabled()) {
3124          NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - "
3125              + src + " to " + dst);
3126        }
3127        if (!DFSUtil.isValidName(dst)) {
3128          throw new InvalidPathException("Invalid name: " + dst);
3129        }
3130        final FSPermissionChecker pc = getPermissionChecker();
3131        
3132        checkOperation(OperationCategory.WRITE);
3133        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3134        if (cacheEntry != null && cacheEntry.isSuccess()) {
3135          return; // Return previous response
3136        }
3137        byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3138        byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3139        HdfsFileStatus resultingStat = null;
3140        boolean success = false;
3141        writeLock();
3142        try {
3143          checkOperation(OperationCategory.WRITE);
3144          checkNameNodeSafeMode("Cannot rename " + src);
3145          src = FSDirectory.resolvePath(src, srcComponents, dir);
3146          dst = FSDirectory.resolvePath(dst, dstComponents, dir);
3147          renameToInternal(pc, src, dst, cacheEntry != null, options);
3148          resultingStat = getAuditFileInfo(dst, false);
3149          success = true;
3150        } finally {
3151          writeUnlock();
3152          RetryCache.setState(cacheEntry, success);
3153        }
3154        getEditLog().logSync();
3155        if (resultingStat != null) {
3156          StringBuilder cmd = new StringBuilder("rename options=");
3157          for (Rename option : options) {
3158            cmd.append(option.value()).append(" ");
3159          }
3160          logAuditEvent(true, cmd.toString(), src, dst, resultingStat);
3161        }
3162      }
3163    
3164      private void renameToInternal(FSPermissionChecker pc, String src, String dst,
3165          boolean logRetryCache, Options.Rename... options) throws IOException {
3166        assert hasWriteLock();
3167        if (isPermissionEnabled) {
3168          // Rename does not operates on link targets
3169          // Do not resolveLink when checking permissions of src and dst
3170          // Check write access to parent of src
3171          checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
3172          // Check write access to ancestor of dst
3173          checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false);
3174        }
3175    
3176        dir.renameTo(src, dst, logRetryCache, options);
3177      }
3178      
3179      /**
3180       * Remove the indicated file from namespace.
3181       * 
3182       * @see ClientProtocol#delete(String, boolean) for detailed description and 
3183       * description of exceptions
3184       */
3185      boolean delete(String src, boolean recursive)
3186          throws AccessControlException, SafeModeException,
3187          UnresolvedLinkException, IOException {
3188        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3189        if (cacheEntry != null && cacheEntry.isSuccess()) {
3190          return true; // Return previous response
3191        }
3192        boolean ret = false;
3193        try {
3194          ret = deleteInt(src, recursive, cacheEntry != null);
3195        } catch (AccessControlException e) {
3196          logAuditEvent(false, "delete", src);
3197          throw e;
3198        } finally {
3199          RetryCache.setState(cacheEntry, ret);
3200        }
3201        return ret;
3202      }
3203          
3204      private boolean deleteInt(String src, boolean recursive, boolean logRetryCache)
3205          throws AccessControlException, SafeModeException,
3206          UnresolvedLinkException, IOException {
3207        if (NameNode.stateChangeLog.isDebugEnabled()) {
3208          NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
3209        }
3210        boolean status = deleteInternal(src, recursive, true, logRetryCache);
3211        if (status) {
3212          logAuditEvent(true, "delete", src);
3213        }
3214        return status;
3215      }
3216        
3217      private FSPermissionChecker getPermissionChecker()
3218          throws AccessControlException {
3219        try {
3220          return new FSPermissionChecker(fsOwnerShortUserName, supergroup, getRemoteUser());
3221        } catch (IOException ioe) {
3222          throw new AccessControlException(ioe);
3223        }
3224      }
3225      
3226      /**
3227       * Remove a file/directory from the namespace.
3228       * <p>
3229       * For large directories, deletion is incremental. The blocks under
3230       * the directory are collected and deleted a small number at a time holding
3231       * the {@link FSNamesystem} lock.
3232       * <p>
3233       * For small directory or file the deletion is done in one shot.
3234       * 
3235       * @see ClientProtocol#delete(String, boolean) for description of exceptions
3236       */
3237      private boolean deleteInternal(String src, boolean recursive,
3238          boolean enforcePermission, boolean logRetryCache)
3239          throws AccessControlException, SafeModeException, UnresolvedLinkException,
3240                 IOException {
3241        BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
3242        List<INode> removedINodes = new ChunkedArrayList<INode>();
3243        FSPermissionChecker pc = getPermissionChecker();
3244        checkOperation(OperationCategory.WRITE);
3245        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3246        boolean ret = false;
3247        writeLock();
3248        try {
3249          checkOperation(OperationCategory.WRITE);
3250          checkNameNodeSafeMode("Cannot delete " + src);
3251          src = FSDirectory.resolvePath(src, pathComponents, dir);
3252          if (!recursive && dir.isNonEmptyDirectory(src)) {
3253            throw new IOException(src + " is non empty");
3254          }
3255          if (enforcePermission && isPermissionEnabled) {
3256            checkPermission(pc, src, false, null, FsAction.WRITE, null,
3257                FsAction.ALL, false);
3258          }
3259          // Unlink the target directory from directory tree
3260          if (!dir.delete(src, collectedBlocks, removedINodes, logRetryCache)) {
3261            return false;
3262          }
3263          ret = true;
3264        } finally {
3265          writeUnlock();
3266        }
3267        getEditLog().logSync(); 
3268        removeBlocks(collectedBlocks); // Incremental deletion of blocks
3269        collectedBlocks.clear();
3270        dir.writeLock();
3271        try {
3272          dir.removeFromInodeMap(removedINodes);
3273        } finally {
3274          dir.writeUnlock();
3275        }
3276        removedINodes.clear();
3277        if (NameNode.stateChangeLog.isDebugEnabled()) {
3278          NameNode.stateChangeLog.debug("DIR* Namesystem.delete: "
3279            + src +" is removed");
3280        }
3281        return ret;
3282      }
3283    
3284      /**
3285       * From the given list, incrementally remove the blocks from blockManager
3286       * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to
3287       * ensure that other waiters on the lock can get in. See HDFS-2938
3288       * 
3289       * @param blocks
3290       *          An instance of {@link BlocksMapUpdateInfo} which contains a list
3291       *          of blocks that need to be removed from blocksMap
3292       */
3293      void removeBlocks(BlocksMapUpdateInfo blocks) {
3294        List<Block> toDeleteList = blocks.getToDeleteList();
3295        Iterator<Block> iter = toDeleteList.iterator();
3296        while (iter.hasNext()) {
3297          writeLock();
3298          try {
3299            for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) {
3300              blockManager.removeBlock(iter.next());
3301            }
3302          } finally {
3303            writeUnlock();
3304          }
3305        }
3306      }
3307      
3308      /**
3309       * Remove leases, inodes and blocks related to a given path
3310       * @param src The given path
3311       * @param blocks Containing the list of blocks to be deleted from blocksMap
3312       * @param removedINodes Containing the list of inodes to be removed from 
3313       *                      inodesMap
3314       */
3315      void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
3316          List<INode> removedINodes) {
3317        assert hasWriteLock();
3318        leaseManager.removeLeaseWithPrefixPath(src);
3319        // remove inodes from inodesMap
3320        if (removedINodes != null) {
3321          dir.removeFromInodeMap(removedINodes);
3322          removedINodes.clear();
3323        }
3324        if (blocks == null) {
3325          return;
3326        }
3327        
3328        removeBlocksAndUpdateSafemodeTotal(blocks);
3329      }
3330    
3331      /**
3332       * Removes the blocks from blocksmap and updates the safemode blocks total
3333       * 
3334       * @param blocks
3335       *          An instance of {@link BlocksMapUpdateInfo} which contains a list
3336       *          of blocks that need to be removed from blocksMap
3337       */
3338      void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) {
3339        assert hasWriteLock();
3340        // In the case that we are a Standby tailing edits from the
3341        // active while in safe-mode, we need to track the total number
3342        // of blocks and safe blocks in the system.
3343        boolean trackBlockCounts = isSafeModeTrackingBlocks();
3344        int numRemovedComplete = 0, numRemovedSafe = 0;
3345    
3346        for (Block b : blocks.getToDeleteList()) {
3347          if (trackBlockCounts) {
3348            BlockInfo bi = getStoredBlock(b);
3349            if (bi.isComplete()) {
3350              numRemovedComplete++;
3351              if (bi.numNodes() >= blockManager.minReplication) {
3352                numRemovedSafe++;
3353              }
3354            }
3355          }
3356          blockManager.removeBlock(b);
3357        }
3358        if (trackBlockCounts) {
3359          if (LOG.isDebugEnabled()) {
3360            LOG.debug("Adjusting safe-mode totals for deletion."
3361                + "decreasing safeBlocks by " + numRemovedSafe
3362                + ", totalBlocks by " + numRemovedComplete);
3363          }
3364          adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
3365        }
3366      }
3367    
3368      /**
3369       * @see SafeModeInfo#shouldIncrementallyTrackBlocks
3370       */
3371      private boolean isSafeModeTrackingBlocks() {
3372        if (!haEnabled) {
3373          // Never track blocks incrementally in non-HA code.
3374          return false;
3375        }
3376        SafeModeInfo sm = this.safeMode;
3377        return sm != null && sm.shouldIncrementallyTrackBlocks();
3378      }
3379    
3380      /**
3381       * Get the file info for a specific file.
3382       *
3383       * @param src The string representation of the path to the file
3384       * @param resolveLink whether to throw UnresolvedLinkException 
3385       *        if src refers to a symlink
3386       *
3387       * @throws AccessControlException if access is denied
3388       * @throws UnresolvedLinkException if a symlink is encountered.
3389       *
3390       * @return object containing information regarding the file
3391       *         or null if file not found
3392       * @throws StandbyException 
3393       */
3394      HdfsFileStatus getFileInfo(String src, boolean resolveLink) 
3395        throws AccessControlException, UnresolvedLinkException,
3396               StandbyException, IOException {
3397        if (!DFSUtil.isValidName(src)) {
3398          throw new InvalidPathException("Invalid file name: " + src);
3399        }
3400        HdfsFileStatus stat = null;
3401        FSPermissionChecker pc = getPermissionChecker();
3402        checkOperation(OperationCategory.READ);
3403        if (!DFSUtil.isValidName(src)) {
3404          throw new InvalidPathException("Invalid file name: " + src);
3405        }
3406        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3407        readLock();
3408        try {
3409          checkOperation(OperationCategory.READ);
3410          src = FSDirectory.resolvePath(src, pathComponents, dir);
3411          if (isPermissionEnabled) {
3412            checkPermission(pc, src, false, null, null, null, null, resolveLink);
3413          }
3414          stat = dir.getFileInfo(src, resolveLink);
3415        } catch (AccessControlException e) {
3416          logAuditEvent(false, "getfileinfo", src);
3417          throw e;
3418        } finally {
3419          readUnlock();
3420        }
3421        logAuditEvent(true, "getfileinfo", src);
3422        return stat;
3423      }
3424      
3425      /**
3426       * Returns true if the file is closed
3427       */
3428      boolean isFileClosed(String src) 
3429          throws AccessControlException, UnresolvedLinkException,
3430          StandbyException, IOException {
3431        FSPermissionChecker pc = getPermissionChecker();  
3432        checkOperation(OperationCategory.READ);
3433        readLock();
3434        try {
3435          checkOperation(OperationCategory.READ);
3436          if (isPermissionEnabled) {
3437            checkTraverse(pc, src);
3438          }
3439          return !INodeFile.valueOf(dir.getINode(src), src).isUnderConstruction();
3440        } catch (AccessControlException e) {
3441          if (isAuditEnabled() && isExternalInvocation()) {
3442            logAuditEvent(false, "isFileClosed", src);
3443          }
3444          throw e;
3445        } finally {
3446          readUnlock();
3447        }
3448      }
3449    
3450      /**
3451       * Create all the necessary directories
3452       */
3453      boolean mkdirs(String src, PermissionStatus permissions,
3454          boolean createParent) throws IOException, UnresolvedLinkException {
3455        boolean ret = false;
3456        try {
3457          ret = mkdirsInt(src, permissions, createParent);
3458        } catch (AccessControlException e) {
3459          logAuditEvent(false, "mkdirs", src);
3460          throw e;
3461        }
3462        return ret;
3463      }
3464    
3465      private boolean mkdirsInt(String src, PermissionStatus permissions,
3466          boolean createParent) throws IOException, UnresolvedLinkException {
3467        if(NameNode.stateChangeLog.isDebugEnabled()) {
3468          NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
3469        }
3470        if (!DFSUtil.isValidName(src)) {
3471          throw new InvalidPathException(src);
3472        }
3473        FSPermissionChecker pc = getPermissionChecker();
3474        checkOperation(OperationCategory.WRITE);
3475        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3476        HdfsFileStatus resultingStat = null;
3477        boolean status = false;
3478        writeLock();
3479        try {
3480          checkOperation(OperationCategory.WRITE);   
3481          checkNameNodeSafeMode("Cannot create directory " + src);
3482          src = FSDirectory.resolvePath(src, pathComponents, dir);
3483          status = mkdirsInternal(pc, src, permissions, createParent);
3484          if (status) {
3485            resultingStat = dir.getFileInfo(src, false);
3486          }
3487        } finally {
3488          writeUnlock();
3489        }
3490        getEditLog().logSync();
3491        if (status) {
3492          logAuditEvent(true, "mkdirs", src, null, resultingStat);
3493        }
3494        return status;
3495      }
3496        
3497      /**
3498       * Create all the necessary directories
3499       */
3500      private boolean mkdirsInternal(FSPermissionChecker pc, String src,
3501          PermissionStatus permissions, boolean createParent) 
3502          throws IOException, UnresolvedLinkException {
3503        assert hasWriteLock();
3504        if (isPermissionEnabled) {
3505          checkTraverse(pc, src);
3506        }
3507        if (dir.isDirMutable(src)) {
3508          // all the users of mkdirs() are used to expect 'true' even if
3509          // a new directory is not created.
3510          return true;
3511        }
3512        if (isPermissionEnabled) {
3513          checkAncestorAccess(pc, src, FsAction.WRITE);
3514        }
3515        if (!createParent) {
3516          verifyParentDir(src);
3517        }
3518    
3519        // validate that we have enough inodes. This is, at best, a 
3520        // heuristic because the mkdirs() operation might need to 
3521        // create multiple inodes.
3522        checkFsObjectLimit();
3523    
3524        if (!dir.mkdirs(src, permissions, false, now())) {
3525          throw new IOException("Failed to create directory: " + src);
3526        }
3527        return true;
3528      }
3529    
3530      /**
3531       * Get the content summary for a specific file/dir.
3532       *
3533       * @param src The string representation of the path to the file
3534       *
3535       * @throws AccessControlException if access is denied
3536       * @throws UnresolvedLinkException if a symlink is encountered.
3537       * @throws FileNotFoundException if no file exists
3538       * @throws StandbyException
3539       * @throws IOException for issues with writing to the audit log
3540       *
3541       * @return object containing information regarding the file
3542       *         or null if file not found
3543       */
3544      ContentSummary getContentSummary(String src) throws IOException {
3545        FSPermissionChecker pc = getPermissionChecker();
3546        checkOperation(OperationCategory.READ);
3547        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3548        readLock();
3549        boolean success = true;
3550        try {
3551          checkOperation(OperationCategory.READ);
3552          src = FSDirectory.resolvePath(src, pathComponents, dir);
3553          if (isPermissionEnabled) {
3554            checkPermission(pc, src, false, null, null, null, FsAction.READ_EXECUTE);
3555          }
3556          return dir.getContentSummary(src);
3557    
3558        } catch (AccessControlException ace) {
3559          success = false;
3560          throw ace;
3561        } finally {
3562          readUnlock();
3563          logAuditEvent(success, "contentSummary", src);
3564        }
3565      }
3566    
3567      /**
3568       * Set the namespace quota and diskspace quota for a directory.
3569       * See {@link ClientProtocol#setQuota(String, long, long)} for the 
3570       * contract.
3571       * 
3572       * Note: This does not support ".inodes" relative path.
3573       */
3574      void setQuota(String path, long nsQuota, long dsQuota) 
3575          throws IOException, UnresolvedLinkException {
3576        checkSuperuserPrivilege();
3577        checkOperation(OperationCategory.WRITE);
3578        writeLock();
3579        try {
3580          checkOperation(OperationCategory.WRITE);
3581          checkNameNodeSafeMode("Cannot set quota on " + path);
3582          dir.setQuota(path, nsQuota, dsQuota);
3583        } finally {
3584          writeUnlock();
3585        }
3586        getEditLog().logSync();
3587      }
3588    
3589      /** Persist all metadata about this file.
3590       * @param src The string representation of the path
3591       * @param clientName The string representation of the client
3592       * @param lastBlockLength The length of the last block 
3593       *                        under construction reported from client.
3594       * @throws IOException if path does not exist
3595       */
3596      void fsync(String src, String clientName, long lastBlockLength) 
3597          throws IOException, UnresolvedLinkException {
3598        NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
3599        checkOperation(OperationCategory.WRITE);
3600        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3601        writeLock();
3602        try {
3603          checkOperation(OperationCategory.WRITE);
3604          checkNameNodeSafeMode("Cannot fsync file " + src);
3605          src = FSDirectory.resolvePath(src, pathComponents, dir);
3606          INodeFileUnderConstruction pendingFile  = checkLease(src, clientName);
3607          if (lastBlockLength > 0) {
3608            pendingFile.updateLengthOfLastBlock(lastBlockLength);
3609          }
3610          dir.persistBlocks(src, pendingFile, false);
3611        } finally {
3612          writeUnlock();
3613        }
3614        getEditLog().logSync();
3615      }
3616    
3617      /**
3618       * Move a file that is being written to be immutable.
3619       * @param src The filename
3620       * @param lease The lease for the client creating the file
3621       * @param recoveryLeaseHolder reassign lease to this holder if the last block
3622       *        needs recovery; keep current holder if null.
3623       * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal
3624       *         replication;<br>
3625       *         RecoveryInProgressException if lease recovery is in progress.<br>
3626       *         IOException in case of an error.
3627       * @return true  if file has been successfully finalized and closed or 
3628       *         false if block recovery has been initiated. Since the lease owner
3629       *         has been changed and logged, caller should call logSync().
3630       */
3631      boolean internalReleaseLease(Lease lease, String src, 
3632          String recoveryLeaseHolder) throws AlreadyBeingCreatedException, 
3633          IOException, UnresolvedLinkException {
3634        LOG.info("Recovering " + lease + ", src=" + src);
3635        assert !isInSafeMode();
3636        assert hasWriteLock();
3637    
3638        final INodesInPath iip = dir.getLastINodeInPath(src);
3639        final INodeFileUnderConstruction pendingFile
3640            = INodeFileUnderConstruction.valueOf(iip.getINode(0), src);
3641        int nrBlocks = pendingFile.numBlocks();
3642        BlockInfo[] blocks = pendingFile.getBlocks();
3643    
3644        int nrCompleteBlocks;
3645        BlockInfo curBlock = null;
3646        for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) {
3647          curBlock = blocks[nrCompleteBlocks];
3648          if(!curBlock.isComplete())
3649            break;
3650          assert blockManager.checkMinReplication(curBlock) :
3651                  "A COMPLETE block is not minimally replicated in " + src;
3652        }
3653    
3654        // If there are no incomplete blocks associated with this file,
3655        // then reap lease immediately and close the file.
3656        if(nrCompleteBlocks == nrBlocks) {
3657          finalizeINodeFileUnderConstruction(src, pendingFile,
3658              iip.getLatestSnapshot());
3659          NameNode.stateChangeLog.warn("BLOCK*"
3660            + " internalReleaseLease: All existing blocks are COMPLETE,"
3661            + " lease removed, file closed.");
3662          return true;  // closed!
3663        }
3664    
3665        // Only the last and the penultimate blocks may be in non COMPLETE state.
3666        // If the penultimate block is not COMPLETE, then it must be COMMITTED.
3667        if(nrCompleteBlocks < nrBlocks - 2 ||
3668           nrCompleteBlocks == nrBlocks - 2 &&
3669             curBlock != null &&
3670             curBlock.getBlockUCState() != BlockUCState.COMMITTED) {
3671          final String message = "DIR* NameSystem.internalReleaseLease: "
3672            + "attempt to release a create lock on "
3673            + src + " but file is already closed.";
3674          NameNode.stateChangeLog.warn(message);
3675          throw new IOException(message);
3676        }
3677    
3678        // The last block is not COMPLETE, and
3679        // that the penultimate block if exists is either COMPLETE or COMMITTED
3680        final BlockInfo lastBlock = pendingFile.getLastBlock();
3681        BlockUCState lastBlockState = lastBlock.getBlockUCState();
3682        BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
3683        boolean penultimateBlockMinReplication;
3684        BlockUCState penultimateBlockState;
3685        if (penultimateBlock == null) {
3686          penultimateBlockState = BlockUCState.COMPLETE;
3687          // If penultimate block doesn't exist then its minReplication is met
3688          penultimateBlockMinReplication = true;
3689        } else {
3690          penultimateBlockState = BlockUCState.COMMITTED;
3691          penultimateBlockMinReplication = 
3692            blockManager.checkMinReplication(penultimateBlock);
3693        }
3694        assert penultimateBlockState == BlockUCState.COMPLETE ||
3695               penultimateBlockState == BlockUCState.COMMITTED :
3696               "Unexpected state of penultimate block in " + src;
3697    
3698        switch(lastBlockState) {
3699        case COMPLETE:
3700          assert false : "Already checked that the last block is incomplete";
3701          break;
3702        case COMMITTED:
3703          // Close file if committed blocks are minimally replicated
3704          if(penultimateBlockMinReplication &&
3705              blockManager.checkMinReplication(lastBlock)) {
3706            finalizeINodeFileUnderConstruction(src, pendingFile,
3707                iip.getLatestSnapshot());
3708            NameNode.stateChangeLog.warn("BLOCK*"
3709              + " internalReleaseLease: Committed blocks are minimally replicated,"
3710              + " lease removed, file closed.");
3711            return true;  // closed!
3712          }
3713          // Cannot close file right now, since some blocks 
3714          // are not yet minimally replicated.
3715          // This may potentially cause infinite loop in lease recovery
3716          // if there are no valid replicas on data-nodes.
3717          String message = "DIR* NameSystem.internalReleaseLease: " +
3718              "Failed to release lease for file " + src +
3719              ". Committed blocks are waiting to be minimally replicated." +
3720              " Try again later.";
3721          NameNode.stateChangeLog.warn(message);
3722          throw new AlreadyBeingCreatedException(message);
3723        case UNDER_CONSTRUCTION:
3724        case UNDER_RECOVERY:
3725          final BlockInfoUnderConstruction uc = (BlockInfoUnderConstruction)lastBlock;
3726          // setup the last block locations from the blockManager if not known
3727          if (uc.getNumExpectedLocations() == 0) {
3728            uc.setExpectedLocations(blockManager.getStorages(lastBlock));
3729          }
3730    
3731          if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) {
3732            // There is no datanode reported to this block.
3733            // may be client have crashed before writing data to pipeline.
3734            // This blocks doesn't need any recovery.
3735            // We can remove this block and close the file.
3736            pendingFile.removeLastBlock(lastBlock);
3737            finalizeINodeFileUnderConstruction(src, pendingFile,
3738                iip.getLatestSnapshot());
3739            NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
3740                + "Removed empty last block and closed file.");
3741            return true;
3742          }
3743          // start recovery of the last block for this file
3744          long blockRecoveryId = nextGenerationStamp(isLegacyBlock(uc));
3745          lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile);
3746          uc.initializeBlockRecovery(blockRecoveryId);
3747          leaseManager.renewLease(lease);
3748          // Cannot close file right now, since the last block requires recovery.
3749          // This may potentially cause infinite loop in lease recovery
3750          // if there are no valid replicas on data-nodes.
3751          NameNode.stateChangeLog.warn(
3752                    "DIR* NameSystem.internalReleaseLease: " +
3753                    "File " + src + " has not been closed." +
3754                   " Lease recovery is in progress. " +
3755                    "RecoveryId = " + blockRecoveryId + " for block " + lastBlock);
3756          break;
3757        }
3758        return false;
3759      }
3760    
3761      private Lease reassignLease(Lease lease, String src, String newHolder,
3762          INodeFileUnderConstruction pendingFile) {
3763        assert hasWriteLock();
3764        if(newHolder == null)
3765          return lease;
3766        // The following transaction is not synced. Make sure it's sync'ed later.
3767        logReassignLease(lease.getHolder(), src, newHolder);
3768        return reassignLeaseInternal(lease, src, newHolder, pendingFile);
3769      }
3770      
3771      Lease reassignLeaseInternal(Lease lease, String src, String newHolder,
3772          INodeFileUnderConstruction pendingFile) {
3773        assert hasWriteLock();
3774        pendingFile.setClientName(newHolder);
3775        return leaseManager.reassignLease(lease, src, newHolder);
3776      }
3777    
3778      private void commitOrCompleteLastBlock(final INodeFileUnderConstruction fileINode,
3779          final Block commitBlock) throws IOException {
3780        assert hasWriteLock();
3781        if (!blockManager.commitOrCompleteLastBlock(fileINode, commitBlock)) {
3782          return;
3783        }
3784    
3785        // Adjust disk space consumption if required
3786        final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes();    
3787        if (diff > 0) {
3788          try {
3789            String path = leaseManager.findPath(fileINode);
3790            dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication());
3791          } catch (IOException e) {
3792            LOG.warn("Unexpected exception while updating disk space.", e);
3793          }
3794        }
3795      }
3796    
3797      private void finalizeINodeFileUnderConstruction(String src, 
3798          INodeFileUnderConstruction pendingFile, Snapshot latestSnapshot) 
3799          throws IOException, UnresolvedLinkException {
3800        assert hasWriteLock();
3801        leaseManager.removeLease(pendingFile.getClientName(), src);
3802        
3803        pendingFile = pendingFile.recordModification(latestSnapshot,
3804            dir.getINodeMap());
3805    
3806        // The file is no longer pending.
3807        // Create permanent INode, update blocks
3808        final INodeFile newFile = pendingFile.toINodeFile(now());
3809        dir.replaceINodeFile(src, pendingFile, newFile);
3810    
3811        // close file and persist block allocations for this file
3812        dir.closeFile(src, newFile);
3813    
3814        blockManager.checkReplication(newFile);
3815      }
3816    
3817      @VisibleForTesting
3818      BlockInfo getStoredBlock(Block block) {
3819        return blockManager.getStoredBlock(block);
3820      }
3821      
3822      @Override
3823      public boolean isInSnapshot(BlockInfoUnderConstruction blockUC) {
3824        assert hasReadLock();
3825        final BlockCollection bc = blockUC.getBlockCollection();
3826        if (bc == null || !(bc instanceof INodeFileUnderConstruction)) {
3827          return false;
3828        }
3829    
3830        INodeFileUnderConstruction inodeUC = (INodeFileUnderConstruction) blockUC
3831            .getBlockCollection();
3832        String fullName = inodeUC.getName();
3833        try {
3834          if (fullName != null && fullName.startsWith(Path.SEPARATOR)
3835              && dir.getINode(fullName) == inodeUC) {
3836            // If file exists in normal path then no need to look in snapshot
3837            return false;
3838          }
3839        } catch (UnresolvedLinkException e) {
3840          LOG.error("Error while resolving the link : " + fullName, e);
3841          return false;
3842        }
3843        /*
3844         * 1. if bc is an instance of INodeFileUnderConstructionWithSnapshot, and
3845         * bc is not in the current fsdirectory tree, bc must represent a snapshot
3846         * file. 
3847         * 2. if fullName is not an absolute path, bc cannot be existent in the 
3848         * current fsdirectory tree. 
3849         * 3. if bc is not the current node associated with fullName, bc must be a
3850         * snapshot inode.
3851         */
3852        return true;
3853      }
3854    
3855      void commitBlockSynchronization(ExtendedBlock lastblock,
3856          long newgenerationstamp, long newlength,
3857          boolean closeFile, boolean deleteblock, DatanodeID[] newtargets,
3858          String[] newtargetstorages)
3859          throws IOException, UnresolvedLinkException {
3860        LOG.info("commitBlockSynchronization(lastblock=" + lastblock
3861                 + ", newgenerationstamp=" + newgenerationstamp
3862                 + ", newlength=" + newlength
3863                 + ", newtargets=" + Arrays.asList(newtargets)
3864                 + ", closeFile=" + closeFile
3865                 + ", deleteBlock=" + deleteblock
3866                 + ")");
3867        checkOperation(OperationCategory.WRITE);
3868        String src = "";
3869        writeLock();
3870        try {
3871          checkOperation(OperationCategory.WRITE);
3872          // If a DN tries to commit to the standby, the recovery will
3873          // fail, and the next retry will succeed on the new NN.
3874      
3875          checkNameNodeSafeMode(
3876              "Cannot commitBlockSynchronization while in safe mode");
3877          final BlockInfo storedBlock = getStoredBlock(
3878              ExtendedBlock.getLocalBlock(lastblock));
3879          if (storedBlock == null) {
3880            if (deleteblock) {
3881              // This may be a retry attempt so ignore the failure
3882              // to locate the block.
3883              if (LOG.isDebugEnabled()) {
3884                LOG.debug("Block (=" + lastblock + ") not found");
3885              }
3886              return;
3887            } else {
3888              throw new IOException("Block (=" + lastblock + ") not found");
3889            }
3890          }
3891          INodeFile iFile = ((INode)storedBlock.getBlockCollection()).asFile();
3892          if (!iFile.isUnderConstruction() || storedBlock.isComplete()) {
3893            if (LOG.isDebugEnabled()) {
3894              LOG.debug("Unexpected block (=" + lastblock
3895                        + ") since the file (=" + iFile.getLocalName()
3896                        + ") is not under construction");
3897            }
3898            return;
3899          }
3900    
3901          long recoveryId =
3902            ((BlockInfoUnderConstruction)storedBlock).getBlockRecoveryId();
3903          if(recoveryId != newgenerationstamp) {
3904            throw new IOException("The recovery id " + newgenerationstamp
3905                                  + " does not match current recovery id "
3906                                  + recoveryId + " for block " + lastblock); 
3907          }
3908    
3909          INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction)iFile;
3910    
3911          if (deleteblock) {
3912            Block blockToDel = ExtendedBlock.getLocalBlock(lastblock);
3913            boolean remove = pendingFile.removeLastBlock(blockToDel);
3914            if (remove) {
3915              blockManager.removeBlockFromMap(storedBlock);
3916            }
3917          }
3918          else {
3919            // update last block
3920            storedBlock.setGenerationStamp(newgenerationstamp);
3921            storedBlock.setNumBytes(newlength);
3922    
3923            // find the DatanodeDescriptor objects
3924            // There should be no locations in the blockManager till now because the
3925            // file is underConstruction
3926            ArrayList<DatanodeDescriptor> trimmedTargets =
3927                new ArrayList<DatanodeDescriptor>(newtargets.length);
3928            ArrayList<String> trimmedStorages =
3929                new ArrayList<String>(newtargets.length);
3930            if (newtargets.length > 0) {
3931              for (int i = 0; i < newtargets.length; ++i) {
3932                // try to get targetNode
3933                DatanodeDescriptor targetNode =
3934                    blockManager.getDatanodeManager().getDatanode(newtargets[i]);
3935                if (targetNode != null) {
3936                  trimmedTargets.add(targetNode);
3937                  trimmedStorages.add(newtargetstorages[i]);
3938                } else if (LOG.isDebugEnabled()) {
3939                  LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found");
3940                }
3941              }
3942            }
3943            if ((closeFile) && !trimmedTargets.isEmpty()) {
3944              // the file is getting closed. Insert block locations into blockManager.
3945              // Otherwise fsck will report these blocks as MISSING, especially if the
3946              // blocksReceived from Datanodes take a long time to arrive.
3947              for (int i = 0; i < trimmedTargets.size(); i++) {
3948                trimmedTargets.get(i).addBlock(
3949                  trimmedStorages.get(i), storedBlock);
3950              }
3951            }
3952    
3953            // add pipeline locations into the INodeUnderConstruction
3954            DatanodeStorageInfo[] trimmedStorageInfos =
3955                blockManager.getDatanodeManager().getDatanodeStorageInfos(
3956                    trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]),
3957                    trimmedStorages.toArray(new String[trimmedStorages.size()]));
3958            pendingFile.setLastBlock(storedBlock, trimmedStorageInfos);
3959          }
3960    
3961          if (closeFile) {
3962            src = closeFileCommitBlocks(pendingFile, storedBlock);
3963          } else {
3964            // If this commit does not want to close the file, persist blocks
3965            src = persistBlocks(pendingFile, false);
3966          }
3967        } finally {
3968          writeUnlock();
3969        }
3970        getEditLog().logSync();
3971        if (closeFile) {
3972          LOG.info("commitBlockSynchronization(newblock=" + lastblock
3973              + ", file=" + src
3974              + ", newgenerationstamp=" + newgenerationstamp
3975              + ", newlength=" + newlength
3976              + ", newtargets=" + Arrays.asList(newtargets) + ") successful");
3977        } else {
3978          LOG.info("commitBlockSynchronization(" + lastblock + ") successful");
3979        }
3980      }
3981    
3982      /**
3983       *
3984       * @param pendingFile
3985       * @param storedBlock
3986       * @return Path of the file that was closed.
3987       * @throws IOException
3988       */
3989      @VisibleForTesting
3990      String closeFileCommitBlocks(INodeFileUnderConstruction pendingFile,
3991                                           BlockInfo storedBlock)
3992          throws IOException {
3993    
3994        String src = leaseManager.findPath(pendingFile);
3995    
3996        // commit the last block and complete it if it has minimum replicas
3997        commitOrCompleteLastBlock(pendingFile, storedBlock);
3998    
3999        //remove lease, close file
4000        finalizeINodeFileUnderConstruction(src, pendingFile,
4001                                           Snapshot.findLatestSnapshot(pendingFile, null));
4002    
4003        return src;
4004      }
4005    
4006      /**
4007       * Persist the block list for the given file.
4008       *
4009       * @param pendingFile
4010       * @return Path to the given file.
4011       * @throws IOException
4012       */
4013      @VisibleForTesting
4014      String persistBlocks(INodeFileUnderConstruction pendingFile,
4015          boolean logRetryCache) throws IOException {
4016        String src = leaseManager.findPath(pendingFile);
4017        dir.persistBlocks(src, pendingFile, logRetryCache);
4018        return src;
4019      }
4020    
4021      /**
4022       * Renew the lease(s) held by the given client
4023       */
4024      void renewLease(String holder) throws IOException {
4025        checkOperation(OperationCategory.WRITE);
4026        readLock();
4027        try {
4028          checkOperation(OperationCategory.WRITE);
4029          checkNameNodeSafeMode("Cannot renew lease for " + holder);
4030          leaseManager.renewLease(holder);
4031        } finally {
4032          readUnlock();
4033        }
4034      }
4035    
4036      /**
4037       * Get a partial listing of the indicated directory
4038       *
4039       * @param src the directory name
4040       * @param startAfter the name to start after
4041       * @param needLocation if blockLocations need to be returned
4042       * @return a partial listing starting after startAfter
4043       * 
4044       * @throws AccessControlException if access is denied
4045       * @throws UnresolvedLinkException if symbolic link is encountered
4046       * @throws IOException if other I/O error occurred
4047       */
4048      DirectoryListing getListing(String src, byte[] startAfter,
4049          boolean needLocation) 
4050          throws AccessControlException, UnresolvedLinkException, IOException {
4051        try {
4052          return getListingInt(src, startAfter, needLocation);
4053        } catch (AccessControlException e) {
4054          logAuditEvent(false, "listStatus", src);
4055          throw e;
4056        }
4057      }
4058    
4059      private DirectoryListing getListingInt(String src, byte[] startAfter,
4060          boolean needLocation) 
4061        throws AccessControlException, UnresolvedLinkException, IOException {
4062        DirectoryListing dl;
4063        FSPermissionChecker pc = getPermissionChecker();
4064        checkOperation(OperationCategory.READ);
4065        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4066        String startAfterString = new String(startAfter);
4067        readLock();
4068        try {
4069          checkOperation(OperationCategory.READ);
4070          src = FSDirectory.resolvePath(src, pathComponents, dir);
4071    
4072          // Get file name when startAfter is an INodePath
4073          if (FSDirectory.isReservedName(startAfterString)) {
4074            byte[][] startAfterComponents = FSDirectory
4075                .getPathComponentsForReservedPath(startAfterString);
4076            try {
4077              String tmp = FSDirectory.resolvePath(src, startAfterComponents, dir);
4078              byte[][] regularPath = INode.getPathComponents(tmp);
4079              startAfter = regularPath[regularPath.length - 1];
4080            } catch (IOException e) {
4081              // Possibly the inode is deleted
4082              throw new DirectoryListingStartAfterNotFoundException(
4083                  "Can't find startAfter " + startAfterString);
4084            }
4085          }
4086          
4087          if (isPermissionEnabled) {
4088            if (dir.isDir(src)) {
4089              checkPathAccess(pc, src, FsAction.READ_EXECUTE);
4090            } else {
4091              checkTraverse(pc, src);
4092            }
4093          }
4094          logAuditEvent(true, "listStatus", src);
4095          dl = dir.getListing(src, startAfter, needLocation);
4096        } finally {
4097          readUnlock();
4098        }
4099        return dl;
4100      }
4101    
4102      /////////////////////////////////////////////////////////
4103      //
4104      // These methods are called by datanodes
4105      //
4106      /////////////////////////////////////////////////////////
4107      /**
4108       * Register Datanode.
4109       * <p>
4110       * The purpose of registration is to identify whether the new datanode
4111       * serves a new data storage, and will report new data block copies,
4112       * which the namenode was not aware of; or the datanode is a replacement
4113       * node for the data storage that was previously served by a different
4114       * or the same (in terms of host:port) datanode.
4115       * The data storages are distinguished by their storageIDs. When a new
4116       * data storage is reported the namenode issues a new unique storageID.
4117       * <p>
4118       * Finally, the namenode returns its namespaceID as the registrationID
4119       * for the datanodes. 
4120       * namespaceID is a persistent attribute of the name space.
4121       * The registrationID is checked every time the datanode is communicating
4122       * with the namenode. 
4123       * Datanodes with inappropriate registrationID are rejected.
4124       * If the namenode stops, and then restarts it can restore its 
4125       * namespaceID and will continue serving the datanodes that has previously
4126       * registered with the namenode without restarting the whole cluster.
4127       * 
4128       * @see org.apache.hadoop.hdfs.server.datanode.DataNode
4129       */
4130      void registerDatanode(DatanodeRegistration nodeReg) throws IOException {
4131        writeLock();
4132        try {
4133          getBlockManager().getDatanodeManager().registerDatanode(nodeReg);
4134          checkSafeMode();
4135        } finally {
4136          writeUnlock();
4137        }
4138      }
4139      
4140      /**
4141       * Get registrationID for datanodes based on the namespaceID.
4142       * 
4143       * @see #registerDatanode(DatanodeRegistration)
4144       * @return registration ID
4145       */
4146      String getRegistrationID() {
4147        return Storage.getRegistrationID(dir.fsImage.getStorage());
4148      }
4149    
4150      /**
4151       * The given node has reported in.  This method should:
4152       * 1) Record the heartbeat, so the datanode isn't timed out
4153       * 2) Adjust usage stats for future block allocation
4154       * 
4155       * If a substantial amount of time passed since the last datanode 
4156       * heartbeat then request an immediate block report.  
4157       * 
4158       * @return an array of datanode commands 
4159       * @throws IOException
4160       */
4161      HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
4162          StorageReport[] reports, long cacheCapacity, long cacheUsed,
4163          int xceiverCount, int xmitsInProgress, int failedVolumes)
4164            throws IOException {
4165        readLock();
4166        try {
4167          final int maxTransfer = blockManager.getMaxReplicationStreams()
4168              - xmitsInProgress;
4169          DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
4170              nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed,
4171              xceiverCount, maxTransfer, failedVolumes);
4172          return new HeartbeatResponse(cmds, createHaStatusHeartbeat());
4173        } finally {
4174          readUnlock();
4175        }
4176      }
4177    
4178      private NNHAStatusHeartbeat createHaStatusHeartbeat() {
4179        HAState state = haContext.getState();
4180        return new NNHAStatusHeartbeat(state.getServiceState(),
4181            getFSImage().getLastAppliedOrWrittenTxId());
4182      }
4183    
4184      /**
4185       * Returns whether or not there were available resources at the last check of
4186       * resources.
4187       *
4188       * @return true if there were sufficient resources available, false otherwise.
4189       */
4190      boolean nameNodeHasResourcesAvailable() {
4191        return hasResourcesAvailable;
4192      }
4193    
4194      /**
4195       * Perform resource checks and cache the results.
4196       * @throws IOException
4197       */
4198      void checkAvailableResources() {
4199        Preconditions.checkState(nnResourceChecker != null,
4200            "nnResourceChecker not initialized");
4201        hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
4202      }
4203    
4204      /**
4205       * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if
4206       * there are found to be insufficient resources available, causes the NN to
4207       * enter safe mode. If resources are later found to have returned to
4208       * acceptable levels, this daemon will cause the NN to exit safe mode.
4209       */
4210      class NameNodeResourceMonitor implements Runnable  {
4211        boolean shouldNNRmRun = true;
4212        @Override
4213        public void run () {
4214          try {
4215            while (fsRunning && shouldNNRmRun) {
4216              checkAvailableResources();
4217              if(!nameNodeHasResourcesAvailable()) {
4218                String lowResourcesMsg = "NameNode low on available disk space. ";
4219                if (!isInSafeMode()) {
4220                  FSNamesystem.LOG.warn(lowResourcesMsg + "Entering safe mode.");
4221                } else {
4222                  FSNamesystem.LOG.warn(lowResourcesMsg + "Already in safe mode.");
4223                }
4224                enterSafeMode(true);
4225              }
4226              try {
4227                Thread.sleep(resourceRecheckInterval);
4228              } catch (InterruptedException ie) {
4229                // Deliberately ignore
4230              }
4231            }
4232          } catch (Exception e) {
4233            FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
4234          }
4235        }
4236    
4237        public void stopMonitor() {
4238          shouldNNRmRun = false;
4239        }
4240     }
4241    
4242      class NameNodeEditLogRoller implements Runnable {
4243    
4244        private boolean shouldRun = true;
4245        private final long rollThreshold;
4246        private final long sleepIntervalMs;
4247    
4248        public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) {
4249            this.rollThreshold = rollThreshold;
4250            this.sleepIntervalMs = sleepIntervalMs;
4251        }
4252    
4253        @Override
4254        public void run() {
4255          while (fsRunning && shouldRun) {
4256            try {
4257              FSEditLog editLog = getFSImage().getEditLog();
4258              long numEdits =
4259                  editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId();
4260              if (numEdits > rollThreshold) {
4261                FSNamesystem.LOG.info("NameNode rolling its own edit log because"
4262                    + " number of edits in open segment exceeds threshold of "
4263                    + rollThreshold);
4264                rollEditLog();
4265              }
4266              Thread.sleep(sleepIntervalMs);
4267            } catch (InterruptedException e) {
4268              FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName()
4269                  + " was interrupted, exiting");
4270              break;
4271            } catch (Exception e) {
4272              FSNamesystem.LOG.error("Swallowing exception in "
4273                  + NameNodeEditLogRoller.class.getSimpleName() + ":", e);
4274            }
4275          }
4276        }
4277    
4278        public void stop() {
4279          shouldRun = false;
4280        }
4281      }
4282    
4283      public FSImage getFSImage() {
4284        return dir.fsImage;
4285      }
4286    
4287      public FSEditLog getEditLog() {
4288        return getFSImage().getEditLog();
4289      }    
4290    
4291      private void checkBlock(ExtendedBlock block) throws IOException {
4292        if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) {
4293          throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId()
4294              + " - expected " + blockPoolId);
4295        }
4296      }
4297    
4298      @Metric({"MissingBlocks", "Number of missing blocks"})
4299      public long getMissingBlocksCount() {
4300        // not locking
4301        return blockManager.getMissingBlocksCount();
4302      }
4303      
4304      @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"})
4305      public int getExpiredHeartbeats() {
4306        return datanodeStatistics.getExpiredHeartbeats();
4307      }
4308      
4309      @Metric({"TransactionsSinceLastCheckpoint",
4310          "Number of transactions since last checkpoint"})
4311      public long getTransactionsSinceLastCheckpoint() {
4312        return getEditLog().getLastWrittenTxId() -
4313            getFSImage().getStorage().getMostRecentCheckpointTxId();
4314      }
4315      
4316      @Metric({"TransactionsSinceLastLogRoll",
4317          "Number of transactions since last edit log roll"})
4318      public long getTransactionsSinceLastLogRoll() {
4319        if (isInStandbyState() || !getEditLog().isSegmentOpen()) {
4320          return 0;
4321        } else {
4322          return getEditLog().getLastWrittenTxId() -
4323            getEditLog().getCurSegmentTxId() + 1;
4324        }
4325      }
4326      
4327      @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"})
4328      public long getLastWrittenTransactionId() {
4329        return getEditLog().getLastWrittenTxId();
4330      }
4331      
4332      @Metric({"LastCheckpointTime",
4333          "Time in milliseconds since the epoch of the last checkpoint"})
4334      public long getLastCheckpointTime() {
4335        return getFSImage().getStorage().getMostRecentCheckpointTime();
4336      }
4337    
4338      /** @see ClientProtocol#getStats() */
4339      long[] getStats() {
4340        final long[] stats = datanodeStatistics.getStats();
4341        stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks();
4342        stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks();
4343        stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount();
4344        return stats;
4345      }
4346    
4347      @Override // FSNamesystemMBean
4348      @Metric({"CapacityTotal",
4349          "Total raw capacity of data nodes in bytes"})
4350      public long getCapacityTotal() {
4351        return datanodeStatistics.getCapacityTotal();
4352      }
4353    
4354      @Metric({"CapacityTotalGB",
4355          "Total raw capacity of data nodes in GB"})
4356      public float getCapacityTotalGB() {
4357        return DFSUtil.roundBytesToGB(getCapacityTotal());
4358      }
4359    
4360      @Override // FSNamesystemMBean
4361      @Metric({"CapacityUsed",
4362          "Total used capacity across all data nodes in bytes"})
4363      public long getCapacityUsed() {
4364        return datanodeStatistics.getCapacityUsed();
4365      }
4366    
4367      @Metric({"CapacityUsedGB",
4368          "Total used capacity across all data nodes in GB"})
4369      public float getCapacityUsedGB() {
4370        return DFSUtil.roundBytesToGB(getCapacityUsed());
4371      }
4372    
4373      @Override // FSNamesystemMBean
4374      @Metric({"CapacityRemaining", "Remaining capacity in bytes"})
4375      public long getCapacityRemaining() {
4376        return datanodeStatistics.getCapacityRemaining();
4377      }
4378    
4379      @Metric({"CapacityRemainingGB", "Remaining capacity in GB"})
4380      public float getCapacityRemainingGB() {
4381        return DFSUtil.roundBytesToGB(getCapacityRemaining());
4382      }
4383    
4384      @Metric({"CapacityUsedNonDFS",
4385          "Total space used by data nodes for non DFS purposes in bytes"})
4386      public long getCapacityUsedNonDFS() {
4387        return datanodeStatistics.getCapacityUsedNonDFS();
4388      }
4389    
4390      /**
4391       * Total number of connections.
4392       */
4393      @Override // FSNamesystemMBean
4394      @Metric
4395      public int getTotalLoad() {
4396        return datanodeStatistics.getXceiverCount();
4397      }
4398      
4399      @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" })
4400      public int getNumSnapshottableDirs() {
4401        return this.snapshotManager.getNumSnapshottableDirs();
4402      }
4403    
4404      @Metric({ "Snapshots", "The number of snapshots" })
4405      public int getNumSnapshots() {
4406        return this.snapshotManager.getNumSnapshots();
4407      }
4408    
4409      @Override
4410      public String getSnapshotStats() {
4411        Map<String, Object> info = new HashMap<String, Object>();
4412        info.put("SnapshottableDirectories", this.getNumSnapshottableDirs());
4413        info.put("Snapshots", this.getNumSnapshots());
4414        return JSON.toString(info);
4415      }
4416    
4417      int getNumberOfDatanodes(DatanodeReportType type) {
4418        readLock();
4419        try {
4420          return getBlockManager().getDatanodeManager().getDatanodeListForReport(
4421              type).size(); 
4422        } finally {
4423          readUnlock();
4424        }
4425      }
4426    
4427      DatanodeInfo[] datanodeReport(final DatanodeReportType type
4428          ) throws AccessControlException, StandbyException {
4429        checkSuperuserPrivilege();
4430        checkOperation(OperationCategory.UNCHECKED);
4431        readLock();
4432        try {
4433          checkOperation(OperationCategory.UNCHECKED);
4434          final DatanodeManager dm = getBlockManager().getDatanodeManager();      
4435          final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type);
4436    
4437          DatanodeInfo[] arr = new DatanodeInfo[results.size()];
4438          for (int i=0; i<arr.length; i++) {
4439            arr[i] = new DatanodeInfo(results.get(i));
4440          }
4441          return arr;
4442        } finally {
4443          readUnlock();
4444        }
4445      }
4446    
4447      /**
4448       * Save namespace image.
4449       * This will save current namespace into fsimage file and empty edits file.
4450       * Requires superuser privilege and safe mode.
4451       * 
4452       * @throws AccessControlException if superuser privilege is violated.
4453       * @throws IOException if 
4454       */
4455      void saveNamespace() throws AccessControlException, IOException {
4456        checkOperation(OperationCategory.UNCHECKED);
4457        checkSuperuserPrivilege();
4458        
4459        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
4460        if (cacheEntry != null && cacheEntry.isSuccess()) {
4461          return; // Return previous response
4462        }
4463        boolean success = false;
4464        readLock();
4465        try {
4466          checkOperation(OperationCategory.UNCHECKED);
4467          if (!isInSafeMode()) {
4468            throw new IOException("Safe mode should be turned ON "
4469                + "in order to create namespace image.");
4470          }
4471          getFSImage().saveNamespace(this);
4472          success = true;
4473        } finally {
4474          readUnlock();
4475          RetryCache.setState(cacheEntry, success);
4476        }
4477        LOG.info("New namespace image has been created");
4478      }
4479      
4480      /**
4481       * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again.
4482       * Requires superuser privilege.
4483       * 
4484       * @throws AccessControlException if superuser privilege is violated.
4485       */
4486      boolean restoreFailedStorage(String arg) throws AccessControlException,
4487          StandbyException {
4488        checkSuperuserPrivilege();
4489        checkOperation(OperationCategory.UNCHECKED);
4490        writeLock();
4491        try {
4492          checkOperation(OperationCategory.UNCHECKED);
4493          
4494          // if it is disabled - enable it and vice versa.
4495          if(arg.equals("check"))
4496            return getFSImage().getStorage().getRestoreFailedStorage();
4497          
4498          boolean val = arg.equals("true");  // false if not
4499          getFSImage().getStorage().setRestoreFailedStorage(val);
4500          
4501          return val;
4502        } finally {
4503          writeUnlock();
4504        }
4505      }
4506    
4507      Date getStartTime() {
4508        return new Date(startTime); 
4509      }
4510        
4511      void finalizeUpgrade() throws IOException {
4512        checkSuperuserPrivilege();
4513        checkOperation(OperationCategory.WRITE);
4514        writeLock();
4515        try {
4516          checkOperation(OperationCategory.WRITE);
4517          getFSImage().finalizeUpgrade();
4518        } finally {
4519          writeUnlock();
4520        }
4521      }
4522    
4523      void refreshNodes() throws IOException {
4524        checkOperation(OperationCategory.UNCHECKED);
4525        checkSuperuserPrivilege();
4526        getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration());
4527      }
4528    
4529      void setBalancerBandwidth(long bandwidth) throws IOException {
4530        checkOperation(OperationCategory.UNCHECKED);
4531        checkSuperuserPrivilege();
4532        getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth);
4533      }
4534    
4535      /**
4536       * SafeModeInfo contains information related to the safe mode.
4537       * <p>
4538       * An instance of {@link SafeModeInfo} is created when the name node
4539       * enters safe mode.
4540       * <p>
4541       * During name node startup {@link SafeModeInfo} counts the number of
4542       * <em>safe blocks</em>, those that have at least the minimal number of
4543       * replicas, and calculates the ratio of safe blocks to the total number
4544       * of blocks in the system, which is the size of blocks in
4545       * {@link FSNamesystem#blockManager}. When the ratio reaches the
4546       * {@link #threshold} it starts the SafeModeMonitor daemon in order
4547       * to monitor whether the safe mode {@link #extension} is passed.
4548       * Then it leaves safe mode and destroys itself.
4549       * <p>
4550       * If safe mode is turned on manually then the number of safe blocks is
4551       * not tracked because the name node is not intended to leave safe mode
4552       * automatically in the case.
4553       *
4554       * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean)
4555       */
4556      public class SafeModeInfo {
4557        // configuration fields
4558        /** Safe mode threshold condition %.*/
4559        private double threshold;
4560        /** Safe mode minimum number of datanodes alive */
4561        private int datanodeThreshold;
4562        /** Safe mode extension after the threshold. */
4563        private int extension;
4564        /** Min replication required by safe mode. */
4565        private int safeReplication;
4566        /** threshold for populating needed replication queues */
4567        private double replQueueThreshold;
4568          
4569        // internal fields
4570        /** Time when threshold was reached.
4571         * <br> -1 safe mode is off
4572         * <br> 0 safe mode is on, and threshold is not reached yet
4573         * <br> >0 safe mode is on, but we are in extension period 
4574         */
4575        private long reached = -1;  
4576        /** Total number of blocks. */
4577        int blockTotal; 
4578        /** Number of safe blocks. */
4579        int blockSafe;
4580        /** Number of blocks needed to satisfy safe mode threshold condition */
4581        private int blockThreshold;
4582        /** Number of blocks needed before populating replication queues */
4583        private int blockReplQueueThreshold;
4584        /** time of the last status printout */
4585        private long lastStatusReport = 0;
4586        /** flag indicating whether replication queues have been initialized */
4587        boolean initializedReplQueues = false;
4588        /** Was safemode entered automatically because available resources were low. */
4589        private boolean resourcesLow = false;
4590        /** Should safemode adjust its block totals as blocks come in */
4591        private boolean shouldIncrementallyTrackBlocks = false;
4592        /** counter for tracking startup progress of reported blocks */
4593        private Counter awaitingReportedBlocksCounter;
4594        
4595        /**
4596         * Creates SafeModeInfo when the name node enters
4597         * automatic safe mode at startup.
4598         *  
4599         * @param conf configuration
4600         */
4601        private SafeModeInfo(Configuration conf) {
4602          this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY,
4603              DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT);
4604          if(threshold > 1.0) {
4605            LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold);
4606          }
4607          this.datanodeThreshold = conf.getInt(
4608            DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY,
4609            DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT);
4610          this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0);
4611          this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY, 
4612                                             DFS_NAMENODE_REPLICATION_MIN_DEFAULT);
4613          
4614          LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold);
4615          LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold);
4616          LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + "     = " + extension);
4617    
4618          // default to safe mode threshold (i.e., don't populate queues before leaving safe mode)
4619          this.replQueueThreshold = 
4620            conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY,
4621                          (float) threshold);
4622          this.blockTotal = 0; 
4623          this.blockSafe = 0;
4624        }
4625    
4626        /**
4627         * In the HA case, the StandbyNode can be in safemode while the namespace
4628         * is modified by the edit log tailer. In this case, the number of total
4629         * blocks changes as edits are processed (eg blocks are added and deleted).
4630         * However, we don't want to do the incremental tracking during the
4631         * startup-time loading process -- only once the initial total has been
4632         * set after the image has been loaded.
4633         */
4634        private boolean shouldIncrementallyTrackBlocks() {
4635          return shouldIncrementallyTrackBlocks;
4636        }
4637    
4638        /**
4639         * Creates SafeModeInfo when safe mode is entered manually, or because
4640         * available resources are low.
4641         *
4642         * The {@link #threshold} is set to 1.5 so that it could never be reached.
4643         * {@link #blockTotal} is set to -1 to indicate that safe mode is manual.
4644         * 
4645         * @see SafeModeInfo
4646         */
4647        private SafeModeInfo(boolean resourcesLow, boolean isReplQueuesInited) {
4648          this.threshold = 1.5f;  // this threshold can never be reached
4649          this.datanodeThreshold = Integer.MAX_VALUE;
4650          this.extension = Integer.MAX_VALUE;
4651          this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication
4652          this.replQueueThreshold = 1.5f; // can never be reached
4653          this.blockTotal = -1;
4654          this.blockSafe = -1;
4655          this.resourcesLow = resourcesLow;
4656          this.initializedReplQueues = isReplQueuesInited;
4657          enter();
4658          reportStatus("STATE* Safe mode is ON.", true);
4659        }
4660          
4661        /**
4662         * Check if safe mode is on.
4663         * @return true if in safe mode
4664         */
4665        private synchronized boolean isOn() {
4666          doConsistencyCheck();
4667          return this.reached >= 0;
4668        }
4669          
4670        /**
4671         * Check if we are populating replication queues.
4672         */
4673        private synchronized boolean isPopulatingReplQueues() {
4674          return initializedReplQueues;
4675        }
4676    
4677        /**
4678         * Enter safe mode.
4679         */
4680        private void enter() {
4681          this.reached = 0;
4682        }
4683          
4684        /**
4685         * Leave safe mode.
4686         * <p>
4687         * Check for invalid, under- & over-replicated blocks in the end of startup.
4688         */
4689        private synchronized void leave() {
4690          // if not done yet, initialize replication queues.
4691          // In the standby, do not populate repl queues
4692          if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) {
4693            initializeReplQueues();
4694          }
4695          long timeInSafemode = now() - startTime;
4696          NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 
4697                                        + timeInSafemode/1000 + " secs");
4698          NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
4699    
4700          //Log the following only once (when transitioning from ON -> OFF)
4701          if (reached >= 0) {
4702            NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 
4703          }
4704          reached = -1;
4705          safeMode = null;
4706          final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology();
4707          NameNode.stateChangeLog.info("STATE* Network topology has "
4708              + nt.getNumOfRacks() + " racks and "
4709              + nt.getNumOfLeaves() + " datanodes");
4710          NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has "
4711              + blockManager.numOfUnderReplicatedBlocks() + " blocks");
4712    
4713          startSecretManagerIfNecessary();
4714    
4715          // If startup has not yet completed, end safemode phase.
4716          StartupProgress prog = NameNode.getStartupProgress();
4717          if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
4718            prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS);
4719            prog.endPhase(Phase.SAFEMODE);
4720          }
4721        }
4722    
4723        /**
4724         * Initialize replication queues.
4725         */
4726        private synchronized void initializeReplQueues() {
4727          LOG.info("initializing replication queues");
4728          assert !isPopulatingReplQueues() : "Already initialized repl queues";
4729          long startTimeMisReplicatedScan = now();
4730          blockManager.processMisReplicatedBlocks();
4731          initializedReplQueues = true;
4732          NameNode.stateChangeLog.info("STATE* Replication Queue initialization "
4733              + "scan for invalid, over- and under-replicated blocks "
4734              + "completed in " + (now() - startTimeMisReplicatedScan)
4735              + " msec");
4736        }
4737    
4738        /**
4739         * Check whether we have reached the threshold for 
4740         * initializing replication queues.
4741         */
4742        private synchronized boolean canInitializeReplQueues() {
4743          return shouldPopulateReplQueues()
4744              && blockSafe >= blockReplQueueThreshold;
4745        }
4746          
4747        /** 
4748         * Safe mode can be turned off iff 
4749         * the threshold is reached and 
4750         * the extension time have passed.
4751         * @return true if can leave or false otherwise.
4752         */
4753        private synchronized boolean canLeave() {
4754          if (reached == 0)
4755            return false;
4756          if (now() - reached < extension) {
4757            reportStatus("STATE* Safe mode ON.", false);
4758            return false;
4759          }
4760          return !needEnter();
4761        }
4762          
4763        /** 
4764         * There is no need to enter safe mode 
4765         * if DFS is empty or {@link #threshold} == 0
4766         */
4767        private boolean needEnter() {
4768          return (threshold != 0 && blockSafe < blockThreshold) ||
4769            (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) ||
4770            (!nameNodeHasResourcesAvailable());
4771        }
4772          
4773        /**
4774         * Check and trigger safe mode if needed. 
4775         */
4776        private void checkMode() {
4777          // Have to have write-lock since leaving safemode initializes
4778          // repl queues, which requires write lock
4779          assert hasWriteLock();
4780          // if smmthread is already running, the block threshold must have been 
4781          // reached before, there is no need to enter the safe mode again
4782          if (smmthread == null && needEnter()) {
4783            enter();
4784            // check if we are ready to initialize replication queues
4785            if (canInitializeReplQueues() && !isPopulatingReplQueues()) {
4786              initializeReplQueues();
4787            }
4788            reportStatus("STATE* Safe mode ON.", false);
4789            return;
4790          }
4791          // the threshold is reached or was reached before
4792          if (!isOn() ||                           // safe mode is off
4793              extension <= 0 || threshold <= 0) {  // don't need to wait
4794            this.leave(); // leave safe mode
4795            return;
4796          }
4797          if (reached > 0) {  // threshold has already been reached before
4798            reportStatus("STATE* Safe mode ON.", false);
4799            return;
4800          }
4801          // start monitor
4802          reached = now();
4803          if (smmthread == null) {
4804            smmthread = new Daemon(new SafeModeMonitor());
4805            smmthread.start();
4806            reportStatus("STATE* Safe mode extension entered.", true);
4807          }
4808    
4809          // check if we are ready to initialize replication queues
4810          if (canInitializeReplQueues() && !isPopulatingReplQueues()) {
4811            initializeReplQueues();
4812          }
4813        }
4814          
4815        /**
4816         * Set total number of blocks.
4817         */
4818        private synchronized void setBlockTotal(int total) {
4819          this.blockTotal = total;
4820          this.blockThreshold = (int) (blockTotal * threshold);
4821          this.blockReplQueueThreshold = 
4822            (int) (blockTotal * replQueueThreshold);
4823          if (haEnabled) {
4824            // After we initialize the block count, any further namespace
4825            // modifications done while in safe mode need to keep track
4826            // of the number of total blocks in the system.
4827            this.shouldIncrementallyTrackBlocks = true;
4828          }
4829          if(blockSafe < 0)
4830            this.blockSafe = 0;
4831          checkMode();
4832        }
4833          
4834        /**
4835         * Increment number of safe blocks if current block has 
4836         * reached minimal replication.
4837         * @param replication current replication 
4838         */
4839        private synchronized void incrementSafeBlockCount(short replication) {
4840          if (replication == safeReplication) {
4841            this.blockSafe++;
4842    
4843            // Report startup progress only if we haven't completed startup yet.
4844            StartupProgress prog = NameNode.getStartupProgress();
4845            if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
4846              if (this.awaitingReportedBlocksCounter == null) {
4847                this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE,
4848                  STEP_AWAITING_REPORTED_BLOCKS);
4849              }
4850              this.awaitingReportedBlocksCounter.increment();
4851            }
4852    
4853            checkMode();
4854          }
4855        }
4856          
4857        /**
4858         * Decrement number of safe blocks if current block has 
4859         * fallen below minimal replication.
4860         * @param replication current replication 
4861         */
4862        private synchronized void decrementSafeBlockCount(short replication) {
4863          if (replication == safeReplication-1) {
4864            this.blockSafe--;
4865            //blockSafe is set to -1 in manual / low resources safemode
4866            assert blockSafe >= 0 || isManual() || areResourcesLow();
4867            checkMode();
4868          }
4869        }
4870    
4871        /**
4872         * Check if safe mode was entered manually
4873         */
4874        private boolean isManual() {
4875          return extension == Integer.MAX_VALUE;
4876        }
4877    
4878        /**
4879         * Set manual safe mode.
4880         */
4881        private synchronized void setManual() {
4882          extension = Integer.MAX_VALUE;
4883        }
4884    
4885        /**
4886         * Check if safe mode was entered due to resources being low.
4887         */
4888        private boolean areResourcesLow() {
4889          return resourcesLow;
4890        }
4891    
4892        /**
4893         * Set that resources are low for this instance of safe mode.
4894         */
4895        private void setResourcesLow() {
4896          resourcesLow = true;
4897        }
4898    
4899        /**
4900         * A tip on how safe mode is to be turned off: manually or automatically.
4901         */
4902        String getTurnOffTip() {
4903          if(!isOn())
4904            return "Safe mode is OFF.";
4905    
4906          //Manual OR low-resource safemode. (Admin intervention required)
4907          String leaveMsg = "It was turned on manually. ";
4908          if (areResourcesLow()) {
4909            leaveMsg = "Resources are low on NN. Please add or free up more "
4910              + "resources then turn off safe mode manually. NOTE:  If you turn off"
4911              + " safe mode before adding resources, "
4912              + "the NN will immediately return to safe mode. ";
4913          }
4914          if (isManual() || areResourcesLow()) {
4915            return leaveMsg
4916              + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
4917          }
4918    
4919          //Automatic safemode. System will come out of safemode automatically.
4920          leaveMsg = "Safe mode will be turned off automatically";
4921          int numLive = getNumLiveDataNodes();
4922          String msg = "";
4923          if (reached == 0) {
4924            if (blockSafe < blockThreshold) {
4925              msg += String.format(
4926                "The reported blocks %d needs additional %d"
4927                + " blocks to reach the threshold %.4f of total blocks %d.\n",
4928                blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
4929            }
4930            if (numLive < datanodeThreshold) {
4931              msg += String.format(
4932                "The number of live datanodes %d needs an additional %d live "
4933                + "datanodes to reach the minimum number %d.\n",
4934                numLive, (datanodeThreshold - numLive), datanodeThreshold);
4935            }
4936          } else {
4937            msg = String.format("The reported blocks %d has reached the threshold"
4938                + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
4939    
4940            msg += String.format("The number of live datanodes %d has reached "
4941                                   + "the minimum number %d. ",
4942                                   numLive, datanodeThreshold);
4943          }
4944          msg += leaveMsg;
4945          // threshold is not reached or manual or resources low
4946          if(reached == 0 || (isManual() && !areResourcesLow())) {
4947            return msg;
4948          }
4949          // extension period is in progress
4950          return msg + (reached + extension - now() > 0 ?
4951            " in " + (reached + extension - now()) / 1000 + " seconds."
4952            : " soon.");
4953        }
4954    
4955        /**
4956         * Print status every 20 seconds.
4957         */
4958        private void reportStatus(String msg, boolean rightNow) {
4959          long curTime = now();
4960          if(!rightNow && (curTime - lastStatusReport < 20 * 1000))
4961            return;
4962          NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip());
4963          lastStatusReport = curTime;
4964        }
4965    
4966        @Override
4967        public String toString() {
4968          String resText = "Current safe blocks = " 
4969            + blockSafe 
4970            + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold
4971            + ". Minimal replication = " + safeReplication + ".";
4972          if (reached > 0) 
4973            resText += " Threshold was reached " + new Date(reached) + ".";
4974          return resText;
4975        }
4976          
4977        /**
4978         * Checks consistency of the class state.
4979         * This is costly so only runs if asserts are enabled.
4980         */
4981        private void doConsistencyCheck() {
4982          boolean assertsOn = false;
4983          assert assertsOn = true; // set to true if asserts are on
4984          if (!assertsOn) return;
4985          
4986          if (blockTotal == -1 && blockSafe == -1) {
4987            return; // manual safe mode
4988          }
4989          int activeBlocks = blockManager.getActiveBlockCount();
4990          if ((blockTotal != activeBlocks) &&
4991              !(blockSafe >= 0 && blockSafe <= blockTotal)) {
4992            throw new AssertionError(
4993                " SafeMode: Inconsistent filesystem state: "
4994            + "SafeMode data: blockTotal=" + blockTotal
4995            + " blockSafe=" + blockSafe + "; "
4996            + "BlockManager data: active="  + activeBlocks);
4997          }
4998        }
4999    
5000        private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) {
5001          if (!shouldIncrementallyTrackBlocks) {
5002            return;
5003          }
5004          assert haEnabled;
5005          
5006          if (LOG.isDebugEnabled()) {
5007            LOG.debug("Adjusting block totals from " +
5008                blockSafe + "/" + blockTotal + " to " +
5009                (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal));
5010          }
5011          assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " +
5012            blockSafe + " by " + deltaSafe + ": would be negative";
5013          assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " +
5014            blockTotal + " by " + deltaTotal + ": would be negative";
5015          
5016          blockSafe += deltaSafe;
5017          setBlockTotal(blockTotal + deltaTotal);
5018        }
5019      }
5020        
5021      /**
5022       * Periodically check whether it is time to leave safe mode.
5023       * This thread starts when the threshold level is reached.
5024       *
5025       */
5026      class SafeModeMonitor implements Runnable {
5027        /** interval in msec for checking safe mode: {@value} */
5028        private static final long recheckInterval = 1000;
5029          
5030        /**
5031         */
5032        @Override
5033        public void run() {
5034          while (fsRunning) {
5035            writeLock();
5036            try {
5037              if (safeMode == null) { // Not in safe mode.
5038                break;
5039              }
5040              if (safeMode.canLeave()) {
5041                // Leave safe mode.
5042                safeMode.leave();
5043                smmthread = null;
5044                break;
5045              }
5046            } finally {
5047              writeUnlock();
5048            }
5049    
5050            try {
5051              Thread.sleep(recheckInterval);
5052            } catch (InterruptedException ie) {
5053              // Ignored
5054            }
5055          }
5056          if (!fsRunning) {
5057            LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread");
5058          }
5059        }
5060      }
5061        
5062      boolean setSafeMode(SafeModeAction action) throws IOException {
5063        if (action != SafeModeAction.SAFEMODE_GET) {
5064          checkSuperuserPrivilege();
5065          switch(action) {
5066          case SAFEMODE_LEAVE: // leave safe mode
5067            leaveSafeMode();
5068            break;
5069          case SAFEMODE_ENTER: // enter safe mode
5070            enterSafeMode(false);
5071            break;
5072          default:
5073            LOG.error("Unexpected safe mode action");
5074          }
5075        }
5076        return isInSafeMode();
5077      }
5078    
5079      @Override
5080      public void checkSafeMode() {
5081        // safeMode is volatile, and may be set to null at any time
5082        SafeModeInfo safeMode = this.safeMode;
5083        if (safeMode != null) {
5084          safeMode.checkMode();
5085        }
5086      }
5087    
5088      @Override
5089      public boolean isInSafeMode() {
5090        // safeMode is volatile, and may be set to null at any time
5091        SafeModeInfo safeMode = this.safeMode;
5092        if (safeMode == null)
5093          return false;
5094        return safeMode.isOn();
5095      }
5096    
5097      @Override
5098      public boolean isInStartupSafeMode() {
5099        // safeMode is volatile, and may be set to null at any time
5100        SafeModeInfo safeMode = this.safeMode;
5101        if (safeMode == null)
5102          return false;
5103        // If the NN is in safemode, and not due to manual / low resources, we
5104        // assume it must be because of startup. If the NN had low resources during
5105        // startup, we assume it came out of startup safemode and it is now in low
5106        // resources safemode
5107        return !safeMode.isManual() && !safeMode.areResourcesLow()
5108          && safeMode.isOn();
5109      }
5110    
5111      /**
5112       * Check if replication queues are to be populated
5113       * @return true when node is HAState.Active and not in the very first safemode
5114       */
5115      @Override
5116      public boolean isPopulatingReplQueues() {
5117        if (!shouldPopulateReplQueues()) {
5118          return false;
5119        }
5120        // safeMode is volatile, and may be set to null at any time
5121        SafeModeInfo safeMode = this.safeMode;
5122        if (safeMode == null)
5123          return true;
5124        return safeMode.isPopulatingReplQueues();
5125      }
5126    
5127      private boolean shouldPopulateReplQueues() {
5128        if(haContext == null || haContext.getState() == null)
5129          return false;
5130        return haContext.getState().shouldPopulateReplQueues();
5131      }
5132    
5133      @Override
5134      public void incrementSafeBlockCount(int replication) {
5135        // safeMode is volatile, and may be set to null at any time
5136        SafeModeInfo safeMode = this.safeMode;
5137        if (safeMode == null)
5138          return;
5139        safeMode.incrementSafeBlockCount((short)replication);
5140      }
5141    
5142      @Override
5143      public void decrementSafeBlockCount(Block b) {
5144        // safeMode is volatile, and may be set to null at any time
5145        SafeModeInfo safeMode = this.safeMode;
5146        if (safeMode == null) // mostly true
5147          return;
5148        BlockInfo storedBlock = getStoredBlock(b);
5149        if (storedBlock.isComplete()) {
5150          safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas());
5151        }
5152      }
5153      
5154      /**
5155       * Adjust the total number of blocks safe and expected during safe mode.
5156       * If safe mode is not currently on, this is a no-op.
5157       * @param deltaSafe the change in number of safe blocks
5158       * @param deltaTotal the change i nnumber of total blocks expected
5159       */
5160      @Override
5161      public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) {
5162        // safeMode is volatile, and may be set to null at any time
5163        SafeModeInfo safeMode = this.safeMode;
5164        if (safeMode == null)
5165          return;
5166        safeMode.adjustBlockTotals(deltaSafe, deltaTotal);
5167      }
5168    
5169      /**
5170       * Set the total number of blocks in the system. 
5171       */
5172      public void setBlockTotal() {
5173        // safeMode is volatile, and may be set to null at any time
5174        SafeModeInfo safeMode = this.safeMode;
5175        if (safeMode == null)
5176          return;
5177        safeMode.setBlockTotal((int)getCompleteBlocksTotal());
5178      }
5179    
5180      /**
5181       * Get the total number of blocks in the system. 
5182       */
5183      @Override // FSNamesystemMBean
5184      @Metric
5185      public long getBlocksTotal() {
5186        return blockManager.getTotalBlocks();
5187      }
5188    
5189      /**
5190       * Get the total number of COMPLETE blocks in the system.
5191       * For safe mode only complete blocks are counted.
5192       */
5193      private long getCompleteBlocksTotal() {
5194        // Calculate number of blocks under construction
5195        long numUCBlocks = 0;
5196        readLock();
5197        try {
5198          for (Lease lease : leaseManager.getSortedLeases()) {
5199            for (String path : lease.getPaths()) {
5200              final INodeFileUnderConstruction cons;
5201              try {
5202                cons = INodeFileUnderConstruction.valueOf(dir.getINode(path), path);
5203              } catch (UnresolvedLinkException e) {
5204                throw new AssertionError("Lease files should reside on this FS");
5205              } catch (IOException e) {
5206                throw new RuntimeException(e);
5207              }
5208              BlockInfo[] blocks = cons.getBlocks();
5209              if(blocks == null)
5210                continue;
5211              for(BlockInfo b : blocks) {
5212                if(!b.isComplete())
5213                  numUCBlocks++;
5214              }
5215            }
5216          }
5217          LOG.info("Number of blocks under construction: " + numUCBlocks);
5218          return getBlocksTotal() - numUCBlocks;
5219        } finally {
5220          readUnlock();
5221        }
5222      }
5223    
5224      /**
5225       * Enter safe mode. If resourcesLow is false, then we assume it is manual
5226       * @throws IOException
5227       */
5228      void enterSafeMode(boolean resourcesLow) throws IOException {
5229        writeLock();
5230        try {
5231          // Stop the secret manager, since rolling the master key would
5232          // try to write to the edit log
5233          stopSecretManager();
5234    
5235          // Ensure that any concurrent operations have been fully synced
5236          // before entering safe mode. This ensures that the FSImage
5237          // is entirely stable on disk as soon as we're in safe mode.
5238          boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
5239          // Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
5240          // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
5241          if (isEditlogOpenForWrite) {
5242            getEditLog().logSyncAll();
5243          }
5244          if (!isInSafeMode()) {
5245            safeMode = new SafeModeInfo(resourcesLow, isPopulatingReplQueues());
5246            return;
5247          }
5248          if (resourcesLow) {
5249            safeMode.setResourcesLow();
5250          } else {
5251            safeMode.setManual();
5252          }
5253          if (isEditlogOpenForWrite) {
5254            getEditLog().logSyncAll();
5255          }
5256          NameNode.stateChangeLog.info("STATE* Safe mode is ON"
5257              + safeMode.getTurnOffTip());
5258        } finally {
5259          writeUnlock();
5260        }
5261      }
5262    
5263      /**
5264       * Leave safe mode.
5265       * @throws IOException
5266       */
5267      void leaveSafeMode() {
5268        writeLock();
5269        try {
5270          if (!isInSafeMode()) {
5271            NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); 
5272            return;
5273          }
5274          safeMode.leave();
5275        } finally {
5276          writeUnlock();
5277        }
5278      }
5279        
5280      String getSafeModeTip() {
5281        readLock();
5282        try {
5283          if (!isInSafeMode()) {
5284            return "";
5285          }
5286          return safeMode.getTurnOffTip();
5287        } finally {
5288          readUnlock();
5289        }
5290      }
5291    
5292      CheckpointSignature rollEditLog() throws IOException {
5293        checkSuperuserPrivilege();
5294        checkOperation(OperationCategory.JOURNAL);
5295        writeLock();
5296        try {
5297          checkOperation(OperationCategory.JOURNAL);
5298          checkNameNodeSafeMode("Log not rolled");
5299          if (Server.isRpcInvocation()) {
5300            LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
5301          }
5302          return getFSImage().rollEditLog();
5303        } finally {
5304          writeUnlock();
5305        }
5306      }
5307    
5308      NamenodeCommand startCheckpoint(NamenodeRegistration backupNode,
5309          NamenodeRegistration activeNamenode) throws IOException {
5310        checkOperation(OperationCategory.CHECKPOINT);
5311        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
5312            null);
5313        if (cacheEntry != null && cacheEntry.isSuccess()) {
5314          return (NamenodeCommand) cacheEntry.getPayload();
5315        }
5316        writeLock();
5317        NamenodeCommand cmd = null;
5318        try {
5319          checkOperation(OperationCategory.CHECKPOINT);
5320    
5321          checkNameNodeSafeMode("Checkpoint not started");
5322          LOG.info("Start checkpoint for " + backupNode.getAddress());
5323          cmd = getFSImage().startCheckpoint(backupNode, activeNamenode);
5324          getEditLog().logSync();
5325          return cmd;
5326        } finally {
5327          writeUnlock();
5328          RetryCache.setState(cacheEntry, cmd != null, cmd);
5329        }
5330      }
5331    
5332      public void processIncrementalBlockReport(final DatanodeID nodeID,
5333          final String poolId, final StorageReceivedDeletedBlocks srdb)
5334          throws IOException {
5335        writeLock();
5336        try {
5337          blockManager.processIncrementalBlockReport(nodeID, poolId, srdb);
5338        } finally {
5339          writeUnlock();
5340        }
5341      }
5342      
5343      void endCheckpoint(NamenodeRegistration registration,
5344                                CheckpointSignature sig) throws IOException {
5345        checkOperation(OperationCategory.CHECKPOINT);
5346        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5347        if (cacheEntry != null && cacheEntry.isSuccess()) {
5348          return; // Return previous response
5349        }
5350        boolean success = false;
5351        readLock();
5352        try {
5353          checkOperation(OperationCategory.CHECKPOINT);
5354    
5355          checkNameNodeSafeMode("Checkpoint not ended");
5356          LOG.info("End checkpoint for " + registration.getAddress());
5357          getFSImage().endCheckpoint(sig);
5358          success = true;
5359        } finally {
5360          readUnlock();
5361          RetryCache.setState(cacheEntry, success);
5362        }
5363      }
5364    
5365      PermissionStatus createFsOwnerPermissions(FsPermission permission) {
5366        return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission);
5367      }
5368    
5369      private void checkOwner(FSPermissionChecker pc, String path)
5370          throws AccessControlException, UnresolvedLinkException {
5371        checkPermission(pc, path, true, null, null, null, null);
5372      }
5373    
5374      private void checkPathAccess(FSPermissionChecker pc,
5375          String path, FsAction access) throws AccessControlException,
5376          UnresolvedLinkException {
5377        checkPermission(pc, path, false, null, null, access, null);
5378      }
5379    
5380      private void checkParentAccess(FSPermissionChecker pc,
5381          String path, FsAction access) throws AccessControlException,
5382          UnresolvedLinkException {
5383        checkPermission(pc, path, false, null, access, null, null);
5384      }
5385    
5386      private void checkAncestorAccess(FSPermissionChecker pc,
5387          String path, FsAction access) throws AccessControlException,
5388          UnresolvedLinkException {
5389        checkPermission(pc, path, false, access, null, null, null);
5390      }
5391    
5392      private void checkTraverse(FSPermissionChecker pc, String path)
5393          throws AccessControlException, UnresolvedLinkException {
5394        checkPermission(pc, path, false, null, null, null, null);
5395      }
5396    
5397      @Override
5398      public void checkSuperuserPrivilege()
5399          throws AccessControlException {
5400        if (isPermissionEnabled) {
5401          FSPermissionChecker pc = getPermissionChecker();
5402          pc.checkSuperuserPrivilege();
5403        }
5404      }
5405    
5406      /**
5407       * Check whether current user have permissions to access the path. For more
5408       * details of the parameters, see
5409       * {@link FSPermissionChecker#checkPermission()}.
5410       */
5411      private void checkPermission(FSPermissionChecker pc,
5412          String path, boolean doCheckOwner, FsAction ancestorAccess,
5413          FsAction parentAccess, FsAction access, FsAction subAccess)
5414          throws AccessControlException, UnresolvedLinkException {
5415            checkPermission(pc, path, doCheckOwner, ancestorAccess,
5416                parentAccess, access, subAccess, true);
5417      }
5418    
5419      /**
5420       * Check whether current user have permissions to access the path. For more
5421       * details of the parameters, see
5422       * {@link FSPermissionChecker#checkPermission()}.
5423       */
5424      private void checkPermission(FSPermissionChecker pc,
5425          String path, boolean doCheckOwner, FsAction ancestorAccess,
5426          FsAction parentAccess, FsAction access, FsAction subAccess,
5427          boolean resolveLink)
5428          throws AccessControlException, UnresolvedLinkException {
5429        if (!pc.isSuperUser()) {
5430          dir.waitForReady();
5431          readLock();
5432          try {
5433            pc.checkPermission(path, dir.rootDir, doCheckOwner, ancestorAccess,
5434                parentAccess, access, subAccess, resolveLink);
5435          } finally {
5436            readUnlock();
5437          }
5438        }
5439      }
5440      
5441      /**
5442       * Check to see if we have exceeded the limit on the number
5443       * of inodes.
5444       */
5445      void checkFsObjectLimit() throws IOException {
5446        if (maxFsObjects != 0 &&
5447            maxFsObjects <= dir.totalInodes() + getBlocksTotal()) {
5448          throw new IOException("Exceeded the configured number of objects " +
5449                                 maxFsObjects + " in the filesystem.");
5450        }
5451      }
5452    
5453      /**
5454       * Get the total number of objects in the system. 
5455       */
5456      @Override // FSNamesystemMBean
5457      public long getMaxObjects() {
5458        return maxFsObjects;
5459      }
5460    
5461      @Override // FSNamesystemMBean
5462      @Metric
5463      public long getFilesTotal() {
5464        readLock();
5465        try {
5466          return this.dir.totalInodes();
5467        } finally {
5468          readUnlock();
5469        }
5470      }
5471    
5472      @Override // FSNamesystemMBean
5473      @Metric
5474      public long getPendingReplicationBlocks() {
5475        return blockManager.getPendingReplicationBlocksCount();
5476      }
5477    
5478      @Override // FSNamesystemMBean
5479      @Metric
5480      public long getUnderReplicatedBlocks() {
5481        return blockManager.getUnderReplicatedBlocksCount();
5482      }
5483    
5484      /** Returns number of blocks with corrupt replicas */
5485      @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"})
5486      public long getCorruptReplicaBlocks() {
5487        return blockManager.getCorruptReplicaBlocksCount();
5488      }
5489    
5490      @Override // FSNamesystemMBean
5491      @Metric
5492      public long getScheduledReplicationBlocks() {
5493        return blockManager.getScheduledReplicationBlocksCount();
5494      }
5495    
5496      @Metric
5497      public long getPendingDeletionBlocks() {
5498        return blockManager.getPendingDeletionBlocksCount();
5499      }
5500    
5501      @Metric
5502      public long getExcessBlocks() {
5503        return blockManager.getExcessBlocksCount();
5504      }
5505      
5506      // HA-only metric
5507      @Metric
5508      public long getPostponedMisreplicatedBlocks() {
5509        return blockManager.getPostponedMisreplicatedBlocksCount();
5510      }
5511    
5512      // HA-only metric
5513      @Metric
5514      public int getPendingDataNodeMessageCount() {
5515        return blockManager.getPendingDataNodeMessageCount();
5516      }
5517      
5518      // HA-only metric
5519      @Metric
5520      public String getHAState() {
5521        return haContext.getState().toString();
5522      }
5523    
5524      // HA-only metric
5525      @Metric
5526      public long getMillisSinceLastLoadedEdits() {
5527        if (isInStandbyState() && editLogTailer != null) {
5528          return now() - editLogTailer.getLastLoadTimestamp();
5529        } else {
5530          return 0;
5531        }
5532      }
5533      
5534      @Metric
5535      public int getBlockCapacity() {
5536        return blockManager.getCapacity();
5537      }
5538    
5539      @Override // FSNamesystemMBean
5540      public String getFSState() {
5541        return isInSafeMode() ? "safeMode" : "Operational";
5542      }
5543      
5544      private ObjectName mbeanName;
5545      private ObjectName mxbeanName;
5546    
5547      /**
5548       * Register the FSNamesystem MBean using the name
5549       *        "hadoop:service=NameNode,name=FSNamesystemState"
5550       */
5551      private void registerMBean() {
5552        // We can only implement one MXBean interface, so we keep the old one.
5553        try {
5554          StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class);
5555          mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean);
5556        } catch (NotCompliantMBeanException e) {
5557          throw new RuntimeException("Bad MBean setup", e);
5558        }
5559    
5560        LOG.info("Registered FSNamesystemState MBean");
5561      }
5562    
5563      /**
5564       * shutdown FSNamesystem
5565       */
5566      void shutdown() {
5567        if (mbeanName != null) {
5568          MBeans.unregister(mbeanName);
5569          mbeanName = null;
5570        }
5571        if (mxbeanName != null) {
5572          MBeans.unregister(mxbeanName);
5573          mxbeanName = null;
5574        }
5575        if (dir != null) {
5576          dir.shutdown();
5577        }
5578        if (blockManager != null) {
5579          blockManager.shutdown();
5580        }
5581      }
5582      
5583    
5584      @Override // FSNamesystemMBean
5585      public int getNumLiveDataNodes() {
5586        return getBlockManager().getDatanodeManager().getNumLiveDataNodes();
5587      }
5588    
5589      @Override // FSNamesystemMBean
5590      public int getNumDeadDataNodes() {
5591        return getBlockManager().getDatanodeManager().getNumDeadDataNodes();
5592      }
5593      
5594      @Override // FSNamesystemMBean
5595      public int getNumDecomLiveDataNodes() {
5596        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
5597        getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
5598        int liveDecommissioned = 0;
5599        for (DatanodeDescriptor node : live) {
5600          liveDecommissioned += node.isDecommissioned() ? 1 : 0;
5601        }
5602        return liveDecommissioned;
5603      }
5604    
5605      @Override // FSNamesystemMBean
5606      public int getNumDecomDeadDataNodes() {
5607        final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
5608        getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true);
5609        int deadDecommissioned = 0;
5610        for (DatanodeDescriptor node : dead) {
5611          deadDecommissioned += node.isDecommissioned() ? 1 : 0;
5612        }
5613        return deadDecommissioned;
5614      }
5615    
5616      @Override // FSNamesystemMBean
5617      public int getNumDecommissioningDataNodes() {
5618        return getBlockManager().getDatanodeManager().getDecommissioningNodes()
5619            .size();
5620      }
5621    
5622      @Override // FSNamesystemMBean
5623      @Metric({"StaleDataNodes", 
5624        "Number of datanodes marked stale due to delayed heartbeat"})
5625      public int getNumStaleDataNodes() {
5626        return getBlockManager().getDatanodeManager().getNumStaleNodes();
5627      }
5628    
5629      /**
5630       * Sets the current generation stamp for legacy blocks
5631       */
5632      void setGenerationStampV1(long stamp) {
5633        generationStampV1.setCurrentValue(stamp);
5634      }
5635    
5636      /**
5637       * Gets the current generation stamp for legacy blocks
5638       */
5639      long getGenerationStampV1() {
5640        return generationStampV1.getCurrentValue();
5641      }
5642    
5643      /**
5644       * Gets the current generation stamp for this filesystem
5645       */
5646      void setGenerationStampV2(long stamp) {
5647        generationStampV2.setCurrentValue(stamp);
5648      }
5649    
5650      /**
5651       * Gets the current generation stamp for this filesystem
5652       */
5653      long getGenerationStampV2() {
5654        return generationStampV2.getCurrentValue();
5655      }
5656    
5657      /**
5658       * Upgrades the generation stamp for the filesystem
5659       * by reserving a sufficient range for all existing blocks.
5660       * Should be invoked only during the first upgrade to
5661       * sequential block IDs.
5662       */
5663      long upgradeGenerationStampToV2() {
5664        Preconditions.checkState(generationStampV2.getCurrentValue() ==
5665            GenerationStamp.LAST_RESERVED_STAMP);
5666    
5667        generationStampV2.skipTo(
5668            generationStampV1.getCurrentValue() +
5669            HdfsConstants.RESERVED_GENERATION_STAMPS_V1);
5670    
5671        generationStampV1Limit = generationStampV2.getCurrentValue();
5672        return generationStampV2.getCurrentValue();
5673      }
5674    
5675      /**
5676       * Sets the generation stamp that delineates random and sequentially
5677       * allocated block IDs.
5678       * @param stamp
5679       */
5680      void setGenerationStampV1Limit(long stamp) {
5681        Preconditions.checkState(generationStampV1Limit ==
5682                                 GenerationStamp.GRANDFATHER_GENERATION_STAMP);
5683        generationStampV1Limit = stamp;
5684      }
5685    
5686      /**
5687       * Gets the value of the generation stamp that delineates sequential
5688       * and random block IDs.
5689       */
5690      long getGenerationStampAtblockIdSwitch() {
5691        return generationStampV1Limit;
5692      }
5693    
5694      @VisibleForTesting
5695      SequentialBlockIdGenerator getBlockIdGenerator() {
5696        return blockIdGenerator;
5697      }
5698    
5699      /**
5700       * Sets the maximum allocated block ID for this filesystem. This is
5701       * the basis for allocating new block IDs.
5702       */
5703      void setLastAllocatedBlockId(long blockId) {
5704        blockIdGenerator.skipTo(blockId);
5705      }
5706    
5707      /**
5708       * Gets the maximum sequentially allocated block ID for this filesystem
5709       */
5710      long getLastAllocatedBlockId() {
5711        return blockIdGenerator.getCurrentValue();
5712      }
5713    
5714      /**
5715       * Increments, logs and then returns the stamp
5716       */
5717      long nextGenerationStamp(boolean legacyBlock)
5718          throws IOException, SafeModeException {
5719        assert hasWriteLock();
5720        checkNameNodeSafeMode("Cannot get next generation stamp");
5721    
5722        long gs;
5723        if (legacyBlock) {
5724          gs = getNextGenerationStampV1();
5725          getEditLog().logGenerationStampV1(gs);
5726        } else {
5727          gs = getNextGenerationStampV2();
5728          getEditLog().logGenerationStampV2(gs);
5729        }
5730    
5731        // NB: callers sync the log
5732        return gs;
5733      }
5734    
5735      @VisibleForTesting
5736      long getNextGenerationStampV1() throws IOException {
5737        long genStampV1 = generationStampV1.nextValue();
5738    
5739        if (genStampV1 >= generationStampV1Limit) {
5740          // We ran out of generation stamps for legacy blocks. In practice, it
5741          // is extremely unlikely as we reserved 1T v1 generation stamps. The
5742          // result is that we can no longer append to the legacy blocks that
5743          // were created before the upgrade to sequential block IDs.
5744          throw new OutOfV1GenerationStampsException();
5745        }
5746    
5747        return genStampV1;
5748      }
5749    
5750      @VisibleForTesting
5751      long getNextGenerationStampV2() {
5752        return generationStampV2.nextValue();
5753      }
5754    
5755      long getGenerationStampV1Limit() {
5756        return generationStampV1Limit;
5757      }
5758    
5759      /**
5760       * Determine whether the block ID was randomly generated (legacy) or
5761       * sequentially generated. The generation stamp value is used to
5762       * make the distinction.
5763       * @param block
5764       * @return true if the block ID was randomly generated, false otherwise.
5765       */
5766      boolean isLegacyBlock(Block block) {
5767        return block.getGenerationStamp() < getGenerationStampV1Limit();
5768      }
5769    
5770      /**
5771       * Increments, logs and then returns the block ID
5772       */
5773      private long nextBlockId() throws IOException {
5774        assert hasWriteLock();
5775        checkNameNodeSafeMode("Cannot get next block ID");
5776        final long blockId = blockIdGenerator.nextValue();
5777        getEditLog().logAllocateBlockId(blockId);
5778        // NB: callers sync the log
5779        return blockId;
5780      }
5781    
5782      private INodeFileUnderConstruction checkUCBlock(ExtendedBlock block,
5783          String clientName) throws IOException {
5784        assert hasWriteLock();
5785        checkNameNodeSafeMode("Cannot get a new generation stamp and an "
5786            + "access token for block " + block);
5787        
5788        // check stored block state
5789        BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block));
5790        if (storedBlock == null || 
5791            storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) {
5792            throw new IOException(block + 
5793                " does not exist or is not under Construction" + storedBlock);
5794        }
5795        
5796        // check file inode
5797        final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile();
5798        if (file==null || !file.isUnderConstruction()) {
5799          throw new IOException("The file " + storedBlock + 
5800              " belonged to does not exist or it is not under construction.");
5801        }
5802        
5803        // check lease
5804        INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction)file;
5805        if (clientName == null || !clientName.equals(pendingFile.getClientName())) {
5806          throw new LeaseExpiredException("Lease mismatch: " + block + 
5807              " is accessed by a non lease holder " + clientName); 
5808        }
5809    
5810        return pendingFile;
5811      }
5812      
5813      /**
5814       * Client is reporting some bad block locations.
5815       */
5816      void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
5817        checkOperation(OperationCategory.WRITE);
5818        NameNode.stateChangeLog.info("*DIR* reportBadBlocks");
5819        writeLock();
5820        try {
5821          checkOperation(OperationCategory.WRITE);
5822          for (int i = 0; i < blocks.length; i++) {
5823            ExtendedBlock blk = blocks[i].getBlock();
5824            DatanodeInfo[] nodes = blocks[i].getLocations();
5825            String[] storageIDs = blocks[i].getStorageIDs();
5826            for (int j = 0; j < nodes.length; j++) {
5827              blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j],
5828                  storageIDs == null ? null: storageIDs[j], 
5829                  "client machine reported it");
5830            }
5831          }
5832        } finally {
5833          writeUnlock();
5834        }
5835      }
5836    
5837      /**
5838       * Get a new generation stamp together with an access token for 
5839       * a block under construction
5840       * 
5841       * This method is called for recovering a failed pipeline or setting up
5842       * a pipeline to append to a block.
5843       * 
5844       * @param block a block
5845       * @param clientName the name of a client
5846       * @return a located block with a new generation stamp and an access token
5847       * @throws IOException if any error occurs
5848       */
5849      LocatedBlock updateBlockForPipeline(ExtendedBlock block, 
5850          String clientName) throws IOException {
5851        LocatedBlock locatedBlock;
5852        checkOperation(OperationCategory.WRITE);
5853        writeLock();
5854        try {
5855          checkOperation(OperationCategory.WRITE);
5856    
5857          // check vadility of parameters
5858          checkUCBlock(block, clientName);
5859      
5860          // get a new generation stamp and an access token
5861          block.setGenerationStamp(
5862              nextGenerationStamp(isLegacyBlock(block.getLocalBlock())));
5863          locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]);
5864          blockManager.setBlockToken(locatedBlock, AccessMode.WRITE);
5865        } finally {
5866          writeUnlock();
5867        }
5868        // Ensure we record the new generation stamp
5869        getEditLog().logSync();
5870        return locatedBlock;
5871      }
5872      
5873      /**
5874       * Update a pipeline for a block under construction
5875       * 
5876       * @param clientName the name of the client
5877       * @param oldBlock and old block
5878       * @param newBlock a new block with a new generation stamp and length
5879       * @param newNodes datanodes in the pipeline
5880       * @throws IOException if any error occurs
5881       */
5882      void updatePipeline(String clientName, ExtendedBlock oldBlock, 
5883          ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs)
5884          throws IOException {
5885        checkOperation(OperationCategory.WRITE);
5886        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5887        if (cacheEntry != null && cacheEntry.isSuccess()) {
5888          return; // Return previous response
5889        }
5890        LOG.info("updatePipeline(block=" + oldBlock
5891                 + ", newGenerationStamp=" + newBlock.getGenerationStamp()
5892                 + ", newLength=" + newBlock.getNumBytes()
5893                 + ", newNodes=" + Arrays.asList(newNodes)
5894                 + ", clientName=" + clientName
5895                 + ")");
5896        writeLock();
5897        boolean success = false;
5898        try {
5899          checkOperation(OperationCategory.WRITE);
5900          checkNameNodeSafeMode("Pipeline not updated");
5901          assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and "
5902            + oldBlock + " has different block identifier";
5903          updatePipelineInternal(clientName, oldBlock, newBlock, newNodes,
5904              newStorageIDs, cacheEntry != null);
5905          success = true;
5906        } finally {
5907          writeUnlock();
5908          RetryCache.setState(cacheEntry, success);
5909        }
5910        getEditLog().logSync();
5911        LOG.info("updatePipeline(" + oldBlock + ") successfully to " + newBlock);
5912      }
5913    
5914      /** @see #updatePipeline(String, ExtendedBlock, ExtendedBlock, DatanodeID[]) */
5915      private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock, 
5916          ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs,
5917          boolean logRetryCache)
5918          throws IOException {
5919        assert hasWriteLock();
5920        // check the vadility of the block and lease holder name
5921        final INodeFileUnderConstruction pendingFile
5922            = checkUCBlock(oldBlock, clientName);
5923        final BlockInfoUnderConstruction blockinfo
5924            = (BlockInfoUnderConstruction)pendingFile.getLastBlock();
5925    
5926        // check new GS & length: this is not expected
5927        if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() ||
5928            newBlock.getNumBytes() < blockinfo.getNumBytes()) {
5929          String msg = "Update " + oldBlock + " (len = " + 
5930            blockinfo.getNumBytes() + ") to an older state: " + newBlock + 
5931            " (len = " + newBlock.getNumBytes() +")";
5932          LOG.warn(msg);
5933          throw new IOException(msg);
5934        }
5935    
5936        // Update old block with the new generation stamp and new length
5937        blockinfo.setNumBytes(newBlock.getNumBytes());
5938        blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp());
5939    
5940        // find the DatanodeDescriptor objects
5941        final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager()
5942            .getDatanodeStorageInfos(newNodes, newStorageIDs);
5943        blockinfo.setExpectedLocations(storages);
5944    
5945        String src = leaseManager.findPath(pendingFile);
5946        dir.persistBlocks(src, pendingFile, logRetryCache);
5947      }
5948    
5949      // rename was successful. If any part of the renamed subtree had
5950      // files that were being written to, update with new filename.
5951      void unprotectedChangeLease(String src, String dst) {
5952        assert hasWriteLock();
5953        leaseManager.changeLease(src, dst);
5954      }
5955    
5956      /**
5957       * Serializes leases. 
5958       */
5959      void saveFilesUnderConstruction(DataOutputStream out,
5960          Map<Long, INodeFileUnderConstruction> snapshotUCMap) throws IOException {
5961        // This is run by an inferior thread of saveNamespace, which holds a read
5962        // lock on our behalf. If we took the read lock here, we could block
5963        // for fairness if a writer is waiting on the lock.
5964        synchronized (leaseManager) {
5965          Map<String, INodeFileUnderConstruction> nodes =
5966              leaseManager.getINodesUnderConstruction();
5967          for (Map.Entry<String, INodeFileUnderConstruction> entry
5968              : nodes.entrySet()) {
5969            // TODO: for HDFS-5428, because of rename operations, some
5970            // under-construction files that are
5971            // in the current fs directory can also be captured in the
5972            // snapshotUCMap. We should remove them from the snapshotUCMap.
5973            snapshotUCMap.remove(entry.getValue().getId());
5974          }
5975          
5976          out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size    
5977          for (Map.Entry<String, INodeFileUnderConstruction> entry
5978               : nodes.entrySet()) {
5979            FSImageSerialization.writeINodeUnderConstruction(
5980                out, entry.getValue(), entry.getKey());
5981          }
5982          for (Map.Entry<Long, INodeFileUnderConstruction> entry
5983              : snapshotUCMap.entrySet()) {
5984            // for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>"
5985            // as their paths
5986            StringBuilder b = new StringBuilder();
5987            b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX)
5988                .append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING)
5989                .append(Path.SEPARATOR).append(entry.getValue().getId());
5990            FSImageSerialization.writeINodeUnderConstruction(
5991                out, entry.getValue(), b.toString());
5992          }
5993        }
5994      }
5995    
5996      /**
5997       * Register a Backup name-node, verifying that it belongs
5998       * to the correct namespace, and adding it to the set of
5999       * active journals if necessary.
6000       * 
6001       * @param bnReg registration of the new BackupNode
6002       * @param nnReg registration of this NameNode
6003       * @throws IOException if the namespace IDs do not match
6004       */
6005      void registerBackupNode(NamenodeRegistration bnReg,
6006          NamenodeRegistration nnReg) throws IOException {
6007        writeLock();
6008        try {
6009          if(getFSImage().getStorage().getNamespaceID() 
6010             != bnReg.getNamespaceID())
6011            throw new IOException("Incompatible namespaceIDs: "
6012                + " Namenode namespaceID = "
6013                + getFSImage().getStorage().getNamespaceID() + "; "
6014                + bnReg.getRole() +
6015                " node namespaceID = " + bnReg.getNamespaceID());
6016          if (bnReg.getRole() == NamenodeRole.BACKUP) {
6017            getFSImage().getEditLog().registerBackupNode(
6018                bnReg, nnReg);
6019          }
6020        } finally {
6021          writeUnlock();
6022        }
6023      }
6024    
6025      /**
6026       * Release (unregister) backup node.
6027       * <p>
6028       * Find and remove the backup stream corresponding to the node.
6029       * @param registration
6030       * @throws IOException
6031       */
6032      void releaseBackupNode(NamenodeRegistration registration)
6033        throws IOException {
6034        checkOperation(OperationCategory.WRITE);
6035        writeLock();
6036        try {
6037          checkOperation(OperationCategory.WRITE);
6038          if(getFSImage().getStorage().getNamespaceID()
6039             != registration.getNamespaceID())
6040            throw new IOException("Incompatible namespaceIDs: "
6041                + " Namenode namespaceID = "
6042                + getFSImage().getStorage().getNamespaceID() + "; "
6043                + registration.getRole() +
6044                " node namespaceID = " + registration.getNamespaceID());
6045          getEditLog().releaseBackupStream(registration);
6046        } finally {
6047          writeUnlock();
6048        }
6049      }
6050    
6051      static class CorruptFileBlockInfo {
6052        String path;
6053        Block block;
6054        
6055        public CorruptFileBlockInfo(String p, Block b) {
6056          path = p;
6057          block = b;
6058        }
6059        
6060        @Override
6061        public String toString() {
6062          return block.getBlockName() + "\t" + path;
6063        }
6064      }
6065      /**
6066       * @param path Restrict corrupt files to this portion of namespace.
6067       * @param startBlockAfter Support for continuation; the set of files we return
6068       *  back is ordered by blockid; startBlockAfter tells where to start from
6069       * @return a list in which each entry describes a corrupt file/block
6070       * @throws AccessControlException
6071       * @throws IOException
6072       */
6073      Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path,
6074      String[] cookieTab) throws IOException {
6075        checkSuperuserPrivilege();
6076        checkOperation(OperationCategory.READ);
6077        readLock();
6078        try {
6079          checkOperation(OperationCategory.READ);
6080          if (!isPopulatingReplQueues()) {
6081            throw new IOException("Cannot run listCorruptFileBlocks because " +
6082                                  "replication queues have not been initialized.");
6083          }
6084          // print a limited # of corrupt files per call
6085          int count = 0;
6086          ArrayList<CorruptFileBlockInfo> corruptFiles = new ArrayList<CorruptFileBlockInfo>();
6087    
6088          final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator();
6089    
6090          if (cookieTab == null) {
6091            cookieTab = new String[] { null };
6092          }
6093          int skip = getIntCookie(cookieTab[0]);
6094          for (int i = 0; i < skip && blkIterator.hasNext(); i++) {
6095            blkIterator.next();
6096          }
6097    
6098          while (blkIterator.hasNext()) {
6099            Block blk = blkIterator.next();
6100            final INode inode = (INode)blockManager.getBlockCollection(blk);
6101            skip++;
6102            if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) {
6103              String src = FSDirectory.getFullPathName(inode);
6104              if (src.startsWith(path)){
6105                corruptFiles.add(new CorruptFileBlockInfo(src, blk));
6106                count++;
6107                if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED)
6108                  break;
6109              }
6110            }
6111          }
6112          cookieTab[0] = String.valueOf(skip);
6113          LOG.info("list corrupt file blocks returned: " + count);
6114          return corruptFiles;
6115        } finally {
6116          readUnlock();
6117        }
6118      }
6119    
6120      /**
6121       * Convert string cookie to integer.
6122       */
6123      private static int getIntCookie(String cookie){
6124        int c;
6125        if(cookie == null){
6126          c = 0;
6127        } else {
6128          try{
6129            c = Integer.parseInt(cookie);
6130          }catch (NumberFormatException e) {
6131            c = 0;
6132          }
6133        }
6134        c = Math.max(0, c);
6135        return c;
6136      }
6137    
6138      /**
6139       * Create delegation token secret manager
6140       */
6141      private DelegationTokenSecretManager createDelegationTokenSecretManager(
6142          Configuration conf) {
6143        return new DelegationTokenSecretManager(conf.getLong(
6144            DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY,
6145            DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT),
6146            conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY,
6147                DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT),
6148            conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
6149                DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT),
6150            DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL,
6151            conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
6152                DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT),
6153            this);
6154      }
6155    
6156      /**
6157       * Returns the DelegationTokenSecretManager instance in the namesystem.
6158       * @return delegation token secret manager object
6159       */
6160      DelegationTokenSecretManager getDelegationTokenSecretManager() {
6161        return dtSecretManager;
6162      }
6163    
6164      /**
6165       * @param renewer
6166       * @return Token<DelegationTokenIdentifier>
6167       * @throws IOException
6168       */
6169      Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
6170          throws IOException {
6171        Token<DelegationTokenIdentifier> token;
6172        checkOperation(OperationCategory.WRITE);
6173        writeLock();
6174        try {
6175          checkOperation(OperationCategory.WRITE);
6176          checkNameNodeSafeMode("Cannot issue delegation token");
6177          if (!isAllowedDelegationTokenOp()) {
6178            throw new IOException(
6179              "Delegation Token can be issued only with kerberos or web authentication");
6180          }
6181          if (dtSecretManager == null || !dtSecretManager.isRunning()) {
6182            LOG.warn("trying to get DT with no secret manager running");
6183            return null;
6184          }
6185    
6186          UserGroupInformation ugi = getRemoteUser();
6187          String user = ugi.getUserName();
6188          Text owner = new Text(user);
6189          Text realUser = null;
6190          if (ugi.getRealUser() != null) {
6191            realUser = new Text(ugi.getRealUser().getUserName());
6192          }
6193          DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner,
6194            renewer, realUser);
6195          token = new Token<DelegationTokenIdentifier>(
6196            dtId, dtSecretManager);
6197          long expiryTime = dtSecretManager.getTokenExpiryTime(dtId);
6198          getEditLog().logGetDelegationToken(dtId, expiryTime);
6199        } finally {
6200          writeUnlock();
6201        }
6202        getEditLog().logSync();
6203        return token;
6204      }
6205    
6206      /**
6207       * 
6208       * @param token
6209       * @return New expiryTime of the token
6210       * @throws InvalidToken
6211       * @throws IOException
6212       */
6213      long renewDelegationToken(Token<DelegationTokenIdentifier> token)
6214          throws InvalidToken, IOException {
6215        long expiryTime;
6216        checkOperation(OperationCategory.WRITE);
6217        writeLock();
6218        try {
6219          checkOperation(OperationCategory.WRITE);
6220    
6221          checkNameNodeSafeMode("Cannot renew delegation token");
6222          if (!isAllowedDelegationTokenOp()) {
6223            throw new IOException(
6224                "Delegation Token can be renewed only with kerberos or web authentication");
6225          }
6226          String renewer = getRemoteUser().getShortUserName();
6227          expiryTime = dtSecretManager.renewToken(token, renewer);
6228          DelegationTokenIdentifier id = new DelegationTokenIdentifier();
6229          ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
6230          DataInputStream in = new DataInputStream(buf);
6231          id.readFields(in);
6232          getEditLog().logRenewDelegationToken(id, expiryTime);
6233        } finally {
6234          writeUnlock();
6235        }
6236        getEditLog().logSync();
6237        return expiryTime;
6238      }
6239    
6240      /**
6241       * 
6242       * @param token
6243       * @throws IOException
6244       */
6245      void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
6246          throws IOException {
6247        checkOperation(OperationCategory.WRITE);
6248        writeLock();
6249        try {
6250          checkOperation(OperationCategory.WRITE);
6251    
6252          checkNameNodeSafeMode("Cannot cancel delegation token");
6253          String canceller = getRemoteUser().getUserName();
6254          DelegationTokenIdentifier id = dtSecretManager
6255            .cancelToken(token, canceller);
6256          getEditLog().logCancelDelegationToken(id);
6257        } finally {
6258          writeUnlock();
6259        }
6260        getEditLog().logSync();
6261      }
6262      
6263      /**
6264       * @param out save state of the secret manager
6265       * @param sdPath String storage directory path
6266       */
6267      void saveSecretManagerState(DataOutputStream out, String sdPath)
6268          throws IOException {
6269        dtSecretManager.saveSecretManagerState(out, sdPath);
6270      }
6271    
6272      /**
6273       * @param in load the state of secret manager from input stream
6274       */
6275      void loadSecretManagerState(DataInput in) throws IOException {
6276        dtSecretManager.loadSecretManagerState(in);
6277      }
6278    
6279      /**
6280       * Log the updateMasterKey operation to edit logs
6281       * 
6282       * @param key new delegation key.
6283       */
6284      public void logUpdateMasterKey(DelegationKey key) {
6285        
6286        assert !isInSafeMode() :
6287          "this should never be called while in safemode, since we stop " +
6288          "the DT manager before entering safemode!";
6289        // No need to hold FSN lock since we don't access any internal
6290        // structures, and this is stopped before the FSN shuts itself
6291        // down, etc.
6292        getEditLog().logUpdateMasterKey(key);
6293        getEditLog().logSync();
6294      }
6295      
6296      /**
6297       * Log the cancellation of expired tokens to edit logs
6298       * 
6299       * @param id token identifier to cancel
6300       */
6301      public void logExpireDelegationToken(DelegationTokenIdentifier id) {
6302        assert !isInSafeMode() :
6303          "this should never be called while in safemode, since we stop " +
6304          "the DT manager before entering safemode!";
6305        // No need to hold FSN lock since we don't access any internal
6306        // structures, and this is stopped before the FSN shuts itself
6307        // down, etc.
6308        getEditLog().logCancelDelegationToken(id);
6309      }  
6310      
6311      private void logReassignLease(String leaseHolder, String src,
6312          String newHolder) {
6313        assert hasWriteLock();
6314        getEditLog().logReassignLease(leaseHolder, src, newHolder);
6315      }
6316      
6317      /**
6318       * 
6319       * @return true if delegation token operation is allowed
6320       */
6321      private boolean isAllowedDelegationTokenOp() throws IOException {
6322        AuthenticationMethod authMethod = getConnectionAuthenticationMethod();
6323        if (UserGroupInformation.isSecurityEnabled()
6324            && (authMethod != AuthenticationMethod.KERBEROS)
6325            && (authMethod != AuthenticationMethod.KERBEROS_SSL)
6326            && (authMethod != AuthenticationMethod.CERTIFICATE)) {
6327          return false;
6328        }
6329        return true;
6330      }
6331      
6332      /**
6333       * Returns authentication method used to establish the connection
6334       * @return AuthenticationMethod used to establish connection
6335       * @throws IOException
6336       */
6337      private AuthenticationMethod getConnectionAuthenticationMethod()
6338          throws IOException {
6339        UserGroupInformation ugi = getRemoteUser();
6340        AuthenticationMethod authMethod = ugi.getAuthenticationMethod();
6341        if (authMethod == AuthenticationMethod.PROXY) {
6342          authMethod = ugi.getRealUser().getAuthenticationMethod();
6343        }
6344        return authMethod;
6345      }
6346      
6347      /**
6348       * Client invoked methods are invoked over RPC and will be in 
6349       * RPC call context even if the client exits.
6350       */
6351      private boolean isExternalInvocation() {
6352        return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation();
6353      }
6354    
6355      private static InetAddress getRemoteIp() {
6356        InetAddress ip = Server.getRemoteIp();
6357        if (ip != null) {
6358          return ip;
6359        }
6360        return NamenodeWebHdfsMethods.getRemoteIp();
6361      }
6362      
6363      // optimize ugi lookup for RPC operations to avoid a trip through
6364      // UGI.getCurrentUser which is synch'ed
6365      private static UserGroupInformation getRemoteUser() throws IOException {
6366        return NameNode.getRemoteUser();
6367      }
6368      
6369      /**
6370       * Log fsck event in the audit log 
6371       */
6372      void logFsckEvent(String src, InetAddress remoteAddress) throws IOException {
6373        if (isAuditEnabled()) {
6374          logAuditEvent(true, getRemoteUser(),
6375                        remoteAddress,
6376                        "fsck", src, null, null);
6377        }
6378      }
6379      /**
6380       * Register NameNodeMXBean
6381       */
6382      private void registerMXBean() {
6383        mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this);
6384      }
6385    
6386      /**
6387       * Class representing Namenode information for JMX interfaces
6388       */
6389      @Override // NameNodeMXBean
6390      public String getVersion() {
6391        return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision();
6392      }
6393    
6394      @Override // NameNodeMXBean
6395      public long getUsed() {
6396        return this.getCapacityUsed();
6397      }
6398    
6399      @Override // NameNodeMXBean
6400      public long getFree() {
6401        return this.getCapacityRemaining();
6402      }
6403    
6404      @Override // NameNodeMXBean
6405      public long getTotal() {
6406        return this.getCapacityTotal();
6407      }
6408    
6409      @Override // NameNodeMXBean
6410      public String getSafemode() {
6411        if (!this.isInSafeMode())
6412          return "";
6413        return "Safe mode is ON. " + this.getSafeModeTip();
6414      }
6415    
6416      @Override // NameNodeMXBean
6417      public boolean isUpgradeFinalized() {
6418        return this.getFSImage().isUpgradeFinalized();
6419      }
6420    
6421      @Override // NameNodeMXBean
6422      public long getNonDfsUsedSpace() {
6423        return datanodeStatistics.getCapacityUsedNonDFS();
6424      }
6425    
6426      @Override // NameNodeMXBean
6427      public float getPercentUsed() {
6428        return datanodeStatistics.getCapacityUsedPercent();
6429      }
6430    
6431      @Override // NameNodeMXBean
6432      public long getBlockPoolUsedSpace() {
6433        return datanodeStatistics.getBlockPoolUsed();
6434      }
6435    
6436      @Override // NameNodeMXBean
6437      public float getPercentBlockPoolUsed() {
6438        return datanodeStatistics.getPercentBlockPoolUsed();
6439      }
6440    
6441      @Override // NameNodeMXBean
6442      public float getPercentRemaining() {
6443        return datanodeStatistics.getCapacityRemainingPercent();
6444      }
6445    
6446      @Override // NameNodeMXBean
6447      public long getCacheCapacity() {
6448        return datanodeStatistics.getCacheCapacity();
6449      }
6450    
6451      @Override // NameNodeMXBean
6452      public long getCacheUsed() {
6453        return datanodeStatistics.getCacheUsed();
6454      }
6455    
6456      @Override // NameNodeMXBean
6457      public long getTotalBlocks() {
6458        return getBlocksTotal();
6459      }
6460    
6461      @Override // NameNodeMXBean
6462      @Metric
6463      public long getTotalFiles() {
6464        return getFilesTotal();
6465      }
6466    
6467      @Override // NameNodeMXBean
6468      public long getNumberOfMissingBlocks() {
6469        return getMissingBlocksCount();
6470      }
6471      
6472      @Override // NameNodeMXBean
6473      public int getThreads() {
6474        return ManagementFactory.getThreadMXBean().getThreadCount();
6475      }
6476    
6477      /**
6478       * Returned information is a JSON representation of map with host name as the
6479       * key and value is a map of live node attribute keys to its values
6480       */
6481      @Override // NameNodeMXBean
6482      public String getLiveNodes() {
6483        final Map<String, Map<String,Object>> info = 
6484          new HashMap<String, Map<String,Object>>();
6485        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6486        blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
6487        for (DatanodeDescriptor node : live) {
6488          Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
6489              .put("infoAddr", node.getInfoAddr())
6490              .put("infoSecureAddr", node.getInfoSecureAddr())
6491              .put("xferaddr", node.getXferAddr())
6492              .put("lastContact", getLastContact(node))
6493              .put("usedSpace", getDfsUsed(node))
6494              .put("adminState", node.getAdminState().toString())
6495              .put("nonDfsUsedSpace", node.getNonDfsUsed())
6496              .put("capacity", node.getCapacity())
6497              .put("numBlocks", node.numBlocks())
6498              .put("version", node.getSoftwareVersion())
6499              .put("used", node.getDfsUsed())
6500              .put("remaining", node.getRemaining())
6501              .put("blockScheduled", node.getBlocksScheduled())
6502              .put("blockPoolUsed", node.getBlockPoolUsed())
6503              .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent())
6504              .put("volfails", node.getVolumeFailures())
6505              .build();
6506    
6507          info.put(node.getHostName(), innerinfo);
6508        }
6509        return JSON.toString(info);
6510      }
6511    
6512      /**
6513       * Returned information is a JSON representation of map with host name as the
6514       * key and value is a map of dead node attribute keys to its values
6515       */
6516      @Override // NameNodeMXBean
6517      public String getDeadNodes() {
6518        final Map<String, Map<String, Object>> info = 
6519          new HashMap<String, Map<String, Object>>();
6520        final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
6521        blockManager.getDatanodeManager().fetchDatanodes(null, dead, true);
6522        for (DatanodeDescriptor node : dead) {
6523          Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
6524              .put("lastContact", getLastContact(node))
6525              .put("decommissioned", node.isDecommissioned())
6526              .put("xferaddr", node.getXferAddr())
6527              .build();
6528          info.put(node.getHostName(), innerinfo);
6529        }
6530        return JSON.toString(info);
6531      }
6532    
6533      /**
6534       * Returned information is a JSON representation of map with host name as the
6535       * key and value is a map of decomisioning node attribute keys to its values
6536       */
6537      @Override // NameNodeMXBean
6538      public String getDecomNodes() {
6539        final Map<String, Map<String, Object>> info = 
6540          new HashMap<String, Map<String, Object>>();
6541        final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager(
6542            ).getDecommissioningNodes();
6543        for (DatanodeDescriptor node : decomNodeList) {
6544          Map<String, Object> innerinfo = ImmutableMap
6545              .<String, Object> builder()
6546              .put("xferaddr", node.getXferAddr())
6547              .put("underReplicatedBlocks",
6548                  node.decommissioningStatus.getUnderReplicatedBlocks())
6549              .put("decommissionOnlyReplicas",
6550                  node.decommissioningStatus.getDecommissionOnlyReplicas())
6551              .put("underReplicateInOpenFiles",
6552                  node.decommissioningStatus.getUnderReplicatedInOpenFiles())
6553              .build();
6554          info.put(node.getHostName(), innerinfo);
6555        }
6556        return JSON.toString(info);
6557      }
6558    
6559      private long getLastContact(DatanodeDescriptor alivenode) {
6560        return (Time.now() - alivenode.getLastUpdate())/1000;
6561      }
6562    
6563      private long getDfsUsed(DatanodeDescriptor alivenode) {
6564        return alivenode.getDfsUsed();
6565      }
6566    
6567      @Override  // NameNodeMXBean
6568      public String getClusterId() {
6569        return dir.fsImage.getStorage().getClusterID();
6570      }
6571      
6572      @Override  // NameNodeMXBean
6573      public String getBlockPoolId() {
6574        return blockPoolId;
6575      }
6576      
6577      @Override  // NameNodeMXBean
6578      public String getNameDirStatuses() {
6579        Map<String, Map<File, StorageDirType>> statusMap =
6580          new HashMap<String, Map<File, StorageDirType>>();
6581        
6582        Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>();
6583        for (Iterator<StorageDirectory> it
6584            = getFSImage().getStorage().dirIterator(); it.hasNext();) {
6585          StorageDirectory st = it.next();
6586          activeDirs.put(st.getRoot(), st.getStorageDirType());
6587        }
6588        statusMap.put("active", activeDirs);
6589        
6590        List<Storage.StorageDirectory> removedStorageDirs
6591            = getFSImage().getStorage().getRemovedStorageDirs();
6592        Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>();
6593        for (StorageDirectory st : removedStorageDirs) {
6594          failedDirs.put(st.getRoot(), st.getStorageDirType());
6595        }
6596        statusMap.put("failed", failedDirs);
6597        
6598        return JSON.toString(statusMap);
6599      }
6600    
6601      @Override // NameNodeMXBean
6602      public String getNodeUsage() {
6603        float median = 0;
6604        float max = 0;
6605        float min = 0;
6606        float dev = 0;
6607    
6608        final Map<String, Map<String,Object>> info =
6609            new HashMap<String, Map<String,Object>>();
6610        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6611        blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
6612    
6613        if (live.size() > 0) {
6614          float totalDfsUsed = 0;
6615          float[] usages = new float[live.size()];
6616          int i = 0;
6617          for (DatanodeDescriptor dn : live) {
6618            usages[i++] = dn.getDfsUsedPercent();
6619            totalDfsUsed += dn.getDfsUsedPercent();
6620          }
6621          totalDfsUsed /= live.size();
6622          Arrays.sort(usages);
6623          median = usages[usages.length / 2];
6624          max = usages[usages.length - 1];
6625          min = usages[0];
6626    
6627          for (i = 0; i < usages.length; i++) {
6628            dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
6629          }
6630          dev = (float) Math.sqrt(dev / usages.length);
6631        }
6632    
6633        final Map<String, Object> innerInfo = new HashMap<String, Object>();
6634        innerInfo.put("min", StringUtils.format("%.2f%%", min));
6635        innerInfo.put("median", StringUtils.format("%.2f%%", median));
6636        innerInfo.put("max", StringUtils.format("%.2f%%", max));
6637        innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev));
6638        info.put("nodeUsage", innerInfo);
6639    
6640        return JSON.toString(info);
6641      }
6642    
6643      @Override  // NameNodeMXBean
6644      public String getNameJournalStatus() {
6645        List<Map<String, String>> jasList = new ArrayList<Map<String, String>>();
6646        FSEditLog log = getFSImage().getEditLog();
6647        if (log != null) {
6648          boolean openForWrite = log.isOpenForWrite();
6649          for (JournalAndStream jas : log.getJournals()) {
6650            final Map<String, String> jasMap = new HashMap<String, String>();
6651            String manager = jas.getManager().toString();
6652    
6653            jasMap.put("required", String.valueOf(jas.isRequired()));
6654            jasMap.put("disabled", String.valueOf(jas.isDisabled()));
6655            jasMap.put("manager", manager);
6656    
6657            if (jas.isDisabled()) {
6658              jasMap.put("stream", "Failed");
6659            } else if (openForWrite) {
6660              EditLogOutputStream elos = jas.getCurrentStream();
6661              if (elos != null) {
6662                jasMap.put("stream", elos.generateReport());
6663              } else {
6664                jasMap.put("stream", "not currently writing");
6665              }
6666            } else {
6667              jasMap.put("stream", "open for read");
6668            }
6669            jasList.add(jasMap);
6670          }
6671        }
6672        return JSON.toString(jasList);
6673      }
6674    
6675      @Override // NameNodeMxBean
6676      public String getJournalTransactionInfo() {
6677        Map<String, String> txnIdMap = new HashMap<String, String>();
6678        txnIdMap.put("LastAppliedOrWrittenTxId",
6679            Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId()));
6680        txnIdMap.put("MostRecentCheckpointTxId",
6681            Long.toString(this.getFSImage().getMostRecentCheckpointTxId()));
6682        return JSON.toString(txnIdMap);
6683      }
6684      
6685      @Override  // NameNodeMXBean
6686      public String getNNStarted() {
6687        return getStartTime().toString();
6688      }
6689    
6690      @Override  // NameNodeMXBean
6691      public String getCompileInfo() {
6692        return VersionInfo.getDate() + " by " + VersionInfo.getUser() +
6693            " from " + VersionInfo.getBranch();
6694      }
6695    
6696      /** @return the block manager. */
6697      public BlockManager getBlockManager() {
6698        return blockManager;
6699      }
6700      /** @return the FSDirectory. */
6701      public FSDirectory getFSDirectory() {
6702        return dir;
6703      }
6704      /** @return the cache manager. */
6705      public CacheManager getCacheManager() {
6706        return cacheManager;
6707      }
6708    
6709      @Override  // NameNodeMXBean
6710      public String getCorruptFiles() {
6711        List<String> list = new ArrayList<String>();
6712        Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks;
6713        try {
6714          corruptFileBlocks = listCorruptFileBlocks("/", null);
6715          int corruptFileCount = corruptFileBlocks.size();
6716          if (corruptFileCount != 0) {
6717            for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) {
6718              list.add(c.toString());
6719            }
6720          }
6721        } catch (IOException e) {
6722          LOG.warn("Get corrupt file blocks returned error: " + e.getMessage());
6723        }
6724        return JSON.toString(list);
6725      }
6726    
6727      @Override  //NameNodeMXBean
6728      public int getDistinctVersionCount() {
6729        return blockManager.getDatanodeManager().getDatanodesSoftwareVersions()
6730          .size();
6731      }
6732    
6733      @Override  //NameNodeMXBean
6734      public Map<String, Integer> getDistinctVersions() {
6735        return blockManager.getDatanodeManager().getDatanodesSoftwareVersions();
6736      }
6737    
6738      @Override  //NameNodeMXBean
6739      public String getSoftwareVersion() {
6740        return VersionInfo.getVersion();
6741      }
6742    
6743      /**
6744       * Verifies that the given identifier and password are valid and match.
6745       * @param identifier Token identifier.
6746       * @param password Password in the token.
6747       */
6748      public synchronized void verifyToken(DelegationTokenIdentifier identifier,
6749          byte[] password) throws InvalidToken, RetriableException {
6750        try {
6751          getDelegationTokenSecretManager().verifyToken(identifier, password);
6752        } catch (InvalidToken it) {
6753          if (inTransitionToActive()) {
6754            throw new RetriableException(it);
6755          }
6756          throw it;
6757        }
6758      }
6759      
6760      @Override
6761      public boolean isGenStampInFuture(Block block) {
6762        if (isLegacyBlock(block)) {
6763          return block.getGenerationStamp() > getGenerationStampV1();
6764        } else {
6765          return block.getGenerationStamp() > getGenerationStampV2();
6766        }
6767      }
6768    
6769      @VisibleForTesting
6770      public EditLogTailer getEditLogTailer() {
6771        return editLogTailer;
6772      }
6773      
6774      @VisibleForTesting
6775      public void setEditLogTailerForTests(EditLogTailer tailer) {
6776        this.editLogTailer = tailer;
6777      }
6778      
6779      @VisibleForTesting
6780      void setFsLockForTests(ReentrantReadWriteLock lock) {
6781        this.fsLock.coarseLock = lock;
6782      }
6783      
6784      @VisibleForTesting
6785      ReentrantReadWriteLock getFsLockForTests() {
6786        return fsLock.coarseLock;
6787      }
6788    
6789      @VisibleForTesting
6790      public SafeModeInfo getSafeModeInfoForTests() {
6791        return safeMode;
6792      }
6793      
6794      @VisibleForTesting
6795      public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) {
6796        this.nnResourceChecker = nnResourceChecker;
6797      }
6798    
6799      @Override
6800      public boolean isAvoidingStaleDataNodesForWrite() {
6801        return this.blockManager.getDatanodeManager()
6802            .shouldAvoidStaleDataNodesForWrite();
6803      }
6804    
6805      @Override // FSClusterStats
6806      public int getNumDatanodesInService() {
6807        return getNumLiveDataNodes() - getNumDecomLiveDataNodes();
6808      }
6809    
6810      public SnapshotManager getSnapshotManager() {
6811        return snapshotManager;
6812      }
6813      
6814      /** Allow snapshot on a directroy. */
6815      void allowSnapshot(String path) throws SafeModeException, IOException {
6816        checkOperation(OperationCategory.WRITE);
6817        writeLock();
6818        try {
6819          checkOperation(OperationCategory.WRITE);
6820          checkNameNodeSafeMode("Cannot allow snapshot for " + path);
6821          checkSuperuserPrivilege();
6822    
6823          dir.writeLock();
6824          try {
6825            snapshotManager.setSnapshottable(path, true);
6826          } finally {
6827            dir.writeUnlock();
6828          }
6829          getEditLog().logAllowSnapshot(path);
6830        } finally {
6831          writeUnlock();
6832        }
6833        getEditLog().logSync();
6834    
6835        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6836          logAuditEvent(true, "allowSnapshot", path, null, null);
6837        }
6838      }
6839      
6840      /** Disallow snapshot on a directory. */
6841      void disallowSnapshot(String path) throws SafeModeException, IOException {
6842        checkOperation(OperationCategory.WRITE);
6843        writeLock();
6844        try {
6845          checkOperation(OperationCategory.WRITE);
6846          checkNameNodeSafeMode("Cannot disallow snapshot for " + path);
6847          checkSuperuserPrivilege();
6848    
6849          dir.writeLock();
6850          try {
6851            snapshotManager.resetSnapshottable(path);
6852          } finally {
6853            dir.writeUnlock();
6854          }
6855          getEditLog().logDisallowSnapshot(path);
6856        } finally {
6857          writeUnlock();
6858        }
6859        getEditLog().logSync();
6860        
6861        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6862          logAuditEvent(true, "disallowSnapshot", path, null, null);
6863        }
6864      }
6865      
6866      /**
6867       * Create a snapshot
6868       * @param snapshotRoot The directory path where the snapshot is taken
6869       * @param snapshotName The name of the snapshot
6870       */
6871      String createSnapshot(String snapshotRoot, String snapshotName)
6872          throws SafeModeException, IOException {
6873        checkOperation(OperationCategory.WRITE);
6874        final FSPermissionChecker pc = getPermissionChecker();
6875        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
6876            null);
6877        if (cacheEntry != null && cacheEntry.isSuccess()) {
6878          return (String) cacheEntry.getPayload();
6879        }
6880        writeLock();
6881        String snapshotPath = null;
6882        try {
6883          checkOperation(OperationCategory.WRITE);
6884          checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot);
6885          if (isPermissionEnabled) {
6886            checkOwner(pc, snapshotRoot);
6887          }
6888    
6889          if (snapshotName == null || snapshotName.isEmpty()) {
6890            snapshotName = Snapshot.generateDefaultSnapshotName();
6891          }
6892          dir.verifySnapshotName(snapshotName, snapshotRoot);
6893          dir.writeLock();
6894          try {
6895            snapshotPath = snapshotManager.createSnapshot(snapshotRoot, snapshotName);
6896          } finally {
6897            dir.writeUnlock();
6898          }
6899          getEditLog().logCreateSnapshot(snapshotRoot, snapshotName,
6900              cacheEntry != null);
6901        } finally {
6902          writeUnlock();
6903          RetryCache.setState(cacheEntry, snapshotPath != null, snapshotPath);
6904        }
6905        getEditLog().logSync();
6906        
6907        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6908          logAuditEvent(true, "createSnapshot", snapshotRoot, snapshotPath, null);
6909        }
6910        return snapshotPath;
6911      }
6912      
6913      /**
6914       * Rename a snapshot
6915       * @param path The directory path where the snapshot was taken
6916       * @param snapshotOldName Old snapshot name
6917       * @param snapshotNewName New snapshot name
6918       * @throws SafeModeException
6919       * @throws IOException 
6920       */
6921      void renameSnapshot(String path, String snapshotOldName,
6922          String snapshotNewName) throws SafeModeException, IOException {
6923        checkOperation(OperationCategory.WRITE);
6924        final FSPermissionChecker pc = getPermissionChecker();
6925        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
6926        if (cacheEntry != null && cacheEntry.isSuccess()) {
6927          return; // Return previous response
6928        }
6929        writeLock();
6930        boolean success = false;
6931        try {
6932          checkOperation(OperationCategory.WRITE);
6933          checkNameNodeSafeMode("Cannot rename snapshot for " + path);
6934          if (isPermissionEnabled) {
6935            checkOwner(pc, path);
6936          }
6937          dir.verifySnapshotName(snapshotNewName, path);
6938          
6939          snapshotManager.renameSnapshot(path, snapshotOldName, snapshotNewName);
6940          getEditLog().logRenameSnapshot(path, snapshotOldName, snapshotNewName,
6941              cacheEntry != null);
6942          success = true;
6943        } finally {
6944          writeUnlock();
6945          RetryCache.setState(cacheEntry, success);
6946        }
6947        getEditLog().logSync();
6948        
6949        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6950          String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName);
6951          String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName);
6952          logAuditEvent(true, "renameSnapshot", oldSnapshotRoot, newSnapshotRoot, null);
6953        }
6954      }
6955      
6956      /**
6957       * Get the list of snapshottable directories that are owned 
6958       * by the current user. Return all the snapshottable directories if the 
6959       * current user is a super user.
6960       * @return The list of all the current snapshottable directories
6961       * @throws IOException
6962       */
6963      public SnapshottableDirectoryStatus[] getSnapshottableDirListing()
6964          throws IOException {
6965        SnapshottableDirectoryStatus[] status = null;
6966        checkOperation(OperationCategory.READ);
6967        final FSPermissionChecker checker = getPermissionChecker();
6968        readLock();
6969        try {
6970          checkOperation(OperationCategory.READ);
6971          final String user = checker.isSuperUser()? null : checker.getUser();
6972          status = snapshotManager.getSnapshottableDirListing(user);
6973        } finally {
6974          readUnlock();
6975        }
6976        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6977          logAuditEvent(true, "listSnapshottableDirectory", null, null, null);
6978        }
6979        return status;
6980      }
6981      
6982      /**
6983       * Get the difference between two snapshots (or between a snapshot and the
6984       * current status) of a snapshottable directory.
6985       * 
6986       * @param path The full path of the snapshottable directory.
6987       * @param fromSnapshot Name of the snapshot to calculate the diff from. Null
6988       *          or empty string indicates the current tree.
6989       * @param toSnapshot Name of the snapshot to calculated the diff to. Null or
6990       *          empty string indicates the current tree.
6991       * @return A report about the difference between {@code fromSnapshot} and 
6992       *         {@code toSnapshot}. Modified/deleted/created/renamed files and 
6993       *         directories belonging to the snapshottable directories are listed 
6994       *         and labeled as M/-/+/R respectively. 
6995       * @throws IOException
6996       */
6997      SnapshotDiffReport getSnapshotDiffReport(String path,
6998          String fromSnapshot, String toSnapshot) throws IOException {
6999        SnapshotDiffInfo diffs = null;
7000        checkOperation(OperationCategory.READ);
7001        final FSPermissionChecker pc = getPermissionChecker();
7002        readLock();
7003        try {
7004          checkOperation(OperationCategory.READ);
7005          if (isPermissionEnabled) {
7006            checkSubtreeReadPermission(pc, path, fromSnapshot);
7007            checkSubtreeReadPermission(pc, path, toSnapshot);
7008          }
7009          diffs = snapshotManager.diff(path, fromSnapshot, toSnapshot);
7010        } finally {
7011          readUnlock();
7012        }
7013        
7014        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7015          logAuditEvent(true, "computeSnapshotDiff", null, null, null);
7016        }
7017        return diffs != null ? diffs.generateReport() : new SnapshotDiffReport(
7018            path, fromSnapshot, toSnapshot,
7019            Collections.<DiffReportEntry> emptyList());
7020      }
7021      
7022      private void checkSubtreeReadPermission(final FSPermissionChecker pc,
7023          final String snapshottablePath, final String snapshot)
7024              throws AccessControlException, UnresolvedLinkException {
7025        final String fromPath = snapshot == null?
7026            snapshottablePath: Snapshot.getSnapshotPath(snapshottablePath, snapshot);
7027        checkPermission(pc, fromPath, false, null, null, FsAction.READ, FsAction.READ);
7028      }
7029      
7030      /**
7031       * Delete a snapshot of a snapshottable directory
7032       * @param snapshotRoot The snapshottable directory
7033       * @param snapshotName The name of the to-be-deleted snapshot
7034       * @throws SafeModeException
7035       * @throws IOException
7036       */
7037      void deleteSnapshot(String snapshotRoot, String snapshotName)
7038          throws SafeModeException, IOException {
7039        checkOperation(OperationCategory.WRITE);
7040        final FSPermissionChecker pc = getPermissionChecker();
7041        
7042        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7043        if (cacheEntry != null && cacheEntry.isSuccess()) {
7044          return; // Return previous response
7045        }
7046        boolean success = false;
7047        BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
7048        writeLock();
7049        try {
7050          checkOperation(OperationCategory.WRITE);
7051          checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot);
7052          if (isPermissionEnabled) {
7053            checkOwner(pc, snapshotRoot);
7054          }
7055    
7056          List<INode> removedINodes = new ChunkedArrayList<INode>();
7057          dir.writeLock();
7058          try {
7059            snapshotManager.deleteSnapshot(snapshotRoot, snapshotName,
7060                collectedBlocks, removedINodes);
7061            dir.removeFromInodeMap(removedINodes);
7062          } finally {
7063            dir.writeUnlock();
7064          }
7065          removedINodes.clear();
7066          getEditLog().logDeleteSnapshot(snapshotRoot, snapshotName,
7067              cacheEntry != null);
7068          success = true;
7069        } finally {
7070          writeUnlock();
7071          RetryCache.setState(cacheEntry, success);
7072        }
7073        getEditLog().logSync();
7074    
7075        removeBlocks(collectedBlocks);
7076        collectedBlocks.clear();
7077    
7078        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7079          String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName);
7080          logAuditEvent(true, "deleteSnapshot", rootPath, null, null);
7081        }
7082      }
7083    
7084      /**
7085       * Remove a list of INodeDirectorySnapshottable from the SnapshotManager
7086       * @param toRemove the list of INodeDirectorySnapshottable to be removed
7087       */
7088      void removeSnapshottableDirs(List<INodeDirectorySnapshottable> toRemove) {
7089        if (snapshotManager != null) {
7090          snapshotManager.removeSnapshottable(toRemove);
7091        }
7092      }
7093    
7094      long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags)
7095          throws IOException {
7096        checkOperation(OperationCategory.WRITE);
7097        final FSPermissionChecker pc = isPermissionEnabled ?
7098            getPermissionChecker() : null;
7099        CacheEntryWithPayload cacheEntry =
7100            RetryCache.waitForCompletion(retryCache, null);
7101        if (cacheEntry != null && cacheEntry.isSuccess()) {
7102          return (Long) cacheEntry.getPayload();
7103        }
7104        boolean success = false;
7105        if (!flags.contains(CacheFlag.FORCE)) {
7106          cacheManager.waitForRescanIfNeeded();
7107        }
7108        writeLock();
7109        Long result = null;
7110        try {
7111          checkOperation(OperationCategory.WRITE);
7112          if (isInSafeMode()) {
7113            throw new SafeModeException(
7114                "Cannot add cache directive", safeMode);
7115          }
7116          if (directive.getId() != null) {
7117            throw new IOException("addDirective: you cannot specify an ID " +
7118                "for this operation.");
7119          }
7120          CacheDirectiveInfo effectiveDirective = 
7121              cacheManager.addDirective(directive, pc, flags);
7122          getEditLog().logAddCacheDirectiveInfo(effectiveDirective,
7123              cacheEntry != null);
7124          result = effectiveDirective.getId();
7125          success = true;
7126        } finally {
7127          writeUnlock();
7128          if (success) {
7129            getEditLog().logSync();
7130          }
7131          if (isAuditEnabled() && isExternalInvocation()) {
7132            logAuditEvent(success, "addCacheDirective", null, null, null);
7133          }
7134          RetryCache.setState(cacheEntry, success, result);
7135        }
7136        return result;
7137      }
7138    
7139      void modifyCacheDirective(CacheDirectiveInfo directive,
7140          EnumSet<CacheFlag> flags) throws IOException {
7141        checkOperation(OperationCategory.WRITE);
7142        final FSPermissionChecker pc = isPermissionEnabled ?
7143            getPermissionChecker() : null;
7144        boolean success = false;
7145        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7146        if (cacheEntry != null && cacheEntry.isSuccess()) {
7147          return;
7148        }
7149        if (!flags.contains(CacheFlag.FORCE)) {
7150          cacheManager.waitForRescanIfNeeded();
7151        }
7152        writeLock();
7153        try {
7154          checkOperation(OperationCategory.WRITE);
7155          if (isInSafeMode()) {
7156            throw new SafeModeException(
7157                "Cannot add cache directive", safeMode);
7158          }
7159          cacheManager.modifyDirective(directive, pc, flags);
7160          getEditLog().logModifyCacheDirectiveInfo(directive,
7161              cacheEntry != null);
7162          success = true;
7163        } finally {
7164          writeUnlock();
7165          if (success) {
7166            getEditLog().logSync();
7167          }
7168          if (isAuditEnabled() && isExternalInvocation()) {
7169            logAuditEvent(success, "modifyCacheDirective", null, null, null);
7170          }
7171          RetryCache.setState(cacheEntry, success);
7172        }
7173      }
7174    
7175      void removeCacheDirective(Long id) throws IOException {
7176        checkOperation(OperationCategory.WRITE);
7177        final FSPermissionChecker pc = isPermissionEnabled ?
7178            getPermissionChecker() : null;
7179        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7180        if (cacheEntry != null && cacheEntry.isSuccess()) {
7181          return;
7182        }
7183        boolean success = false;
7184        writeLock();
7185        try {
7186          checkOperation(OperationCategory.WRITE);
7187          if (isInSafeMode()) {
7188            throw new SafeModeException(
7189                "Cannot remove cache directives", safeMode);
7190          }
7191          cacheManager.removeDirective(id, pc);
7192          getEditLog().logRemoveCacheDirectiveInfo(id, cacheEntry != null);
7193          success = true;
7194        } finally {
7195          writeUnlock();
7196          if (isAuditEnabled() && isExternalInvocation()) {
7197            logAuditEvent(success, "removeCacheDirective", null, null,
7198                null);
7199          }
7200          RetryCache.setState(cacheEntry, success);
7201        }
7202        getEditLog().logSync();
7203      }
7204    
7205      BatchedListEntries<CacheDirectiveEntry> listCacheDirectives(
7206          long startId, CacheDirectiveInfo filter) throws IOException {
7207        checkOperation(OperationCategory.READ);
7208        final FSPermissionChecker pc = isPermissionEnabled ?
7209            getPermissionChecker() : null;
7210        BatchedListEntries<CacheDirectiveEntry> results;
7211        cacheManager.waitForRescanIfNeeded();
7212        readLock();
7213        boolean success = false;
7214        try {
7215          checkOperation(OperationCategory.READ);
7216          results =
7217              cacheManager.listCacheDirectives(startId, filter, pc);
7218          success = true;
7219        } finally {
7220          readUnlock();
7221          if (isAuditEnabled() && isExternalInvocation()) {
7222            logAuditEvent(success, "listCacheDirectives", null, null,
7223                null);
7224          }
7225        }
7226        return results;
7227      }
7228    
7229      public void addCachePool(CachePoolInfo req) throws IOException {
7230        checkOperation(OperationCategory.WRITE);
7231        final FSPermissionChecker pc = isPermissionEnabled ?
7232            getPermissionChecker() : null;
7233        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7234        if (cacheEntry != null && cacheEntry.isSuccess()) {
7235          return; // Return previous response
7236        }
7237        writeLock();
7238        boolean success = false;
7239        try {
7240          checkOperation(OperationCategory.WRITE);
7241          if (isInSafeMode()) {
7242            throw new SafeModeException(
7243                "Cannot add cache pool " + req.getPoolName(), safeMode);
7244          }
7245          if (pc != null) {
7246            pc.checkSuperuserPrivilege();
7247          }
7248          CachePoolInfo info = cacheManager.addCachePool(req);
7249          getEditLog().logAddCachePool(info, cacheEntry != null);
7250          success = true;
7251        } finally {
7252          writeUnlock();
7253          if (isAuditEnabled() && isExternalInvocation()) {
7254            logAuditEvent(success, "addCachePool", req.getPoolName(), null, null);
7255          }
7256          RetryCache.setState(cacheEntry, success);
7257        }
7258        
7259        getEditLog().logSync();
7260      }
7261    
7262      public void modifyCachePool(CachePoolInfo req) throws IOException {
7263        checkOperation(OperationCategory.WRITE);
7264        final FSPermissionChecker pc =
7265            isPermissionEnabled ? getPermissionChecker() : null;
7266        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7267        if (cacheEntry != null && cacheEntry.isSuccess()) {
7268          return; // Return previous response
7269        }
7270        writeLock();
7271        boolean success = false;
7272        try {
7273          checkOperation(OperationCategory.WRITE);
7274          if (isInSafeMode()) {
7275            throw new SafeModeException(
7276                "Cannot modify cache pool " + req.getPoolName(), safeMode);
7277          }
7278          if (pc != null) {
7279            pc.checkSuperuserPrivilege();
7280          }
7281          cacheManager.modifyCachePool(req);
7282          getEditLog().logModifyCachePool(req, cacheEntry != null);
7283          success = true;
7284        } finally {
7285          writeUnlock();
7286          if (isAuditEnabled() && isExternalInvocation()) {
7287            logAuditEvent(success, "modifyCachePool", req.getPoolName(), null, null);
7288          }
7289          RetryCache.setState(cacheEntry, success);
7290        }
7291    
7292        getEditLog().logSync();
7293      }
7294    
7295      public void removeCachePool(String cachePoolName) throws IOException {
7296        checkOperation(OperationCategory.WRITE);
7297        final FSPermissionChecker pc =
7298            isPermissionEnabled ? getPermissionChecker() : null;
7299        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7300        if (cacheEntry != null && cacheEntry.isSuccess()) {
7301          return; // Return previous response
7302        }
7303        writeLock();
7304        boolean success = false;
7305        try {
7306          checkOperation(OperationCategory.WRITE);
7307          if (isInSafeMode()) {
7308            throw new SafeModeException(
7309                "Cannot remove cache pool " + cachePoolName, safeMode);
7310          }
7311          if (pc != null) {
7312            pc.checkSuperuserPrivilege();
7313          }
7314          cacheManager.removeCachePool(cachePoolName);
7315          getEditLog().logRemoveCachePool(cachePoolName, cacheEntry != null);
7316          success = true;
7317        } finally {
7318          writeUnlock();
7319          if (isAuditEnabled() && isExternalInvocation()) {
7320            logAuditEvent(success, "removeCachePool", cachePoolName, null, null);
7321          }
7322          RetryCache.setState(cacheEntry, success);
7323        }
7324        
7325        getEditLog().logSync();
7326      }
7327    
7328      public BatchedListEntries<CachePoolEntry> listCachePools(String prevKey)
7329          throws IOException {
7330        final FSPermissionChecker pc =
7331            isPermissionEnabled ? getPermissionChecker() : null;
7332        BatchedListEntries<CachePoolEntry> results;
7333        checkOperation(OperationCategory.READ);
7334        boolean success = false;
7335        cacheManager.waitForRescanIfNeeded();
7336        readLock();
7337        try {
7338          checkOperation(OperationCategory.READ);
7339          results = cacheManager.listCachePools(pc, prevKey);
7340          success = true;
7341        } finally {
7342          readUnlock();
7343          if (isAuditEnabled() && isExternalInvocation()) {
7344            logAuditEvent(success, "listCachePools", null, null, null);
7345          }
7346        }
7347        return results;
7348      }
7349    
7350      /**
7351       * Default AuditLogger implementation; used when no access logger is
7352       * defined in the config file. It can also be explicitly listed in the
7353       * config file.
7354       */
7355      private static class DefaultAuditLogger extends HdfsAuditLogger {
7356    
7357        private boolean logTokenTrackingId;
7358    
7359        @Override
7360        public void initialize(Configuration conf) {
7361          logTokenTrackingId = conf.getBoolean(
7362              DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
7363              DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT);
7364        }
7365    
7366        @Override
7367        public void logAuditEvent(boolean succeeded, String userName,
7368            InetAddress addr, String cmd, String src, String dst,
7369            FileStatus status, UserGroupInformation ugi,
7370            DelegationTokenSecretManager dtSecretManager) {
7371          if (auditLog.isInfoEnabled()) {
7372            final StringBuilder sb = auditBuffer.get();
7373            sb.setLength(0);
7374            sb.append("allowed=").append(succeeded).append("\t");
7375            sb.append("ugi=").append(userName).append("\t");
7376            sb.append("ip=").append(addr).append("\t");
7377            sb.append("cmd=").append(cmd).append("\t");
7378            sb.append("src=").append(src).append("\t");
7379            sb.append("dst=").append(dst).append("\t");
7380            if (null == status) {
7381              sb.append("perm=null");
7382            } else {
7383              sb.append("perm=");
7384              sb.append(status.getOwner()).append(":");
7385              sb.append(status.getGroup()).append(":");
7386              sb.append(status.getPermission());
7387            }
7388            if (logTokenTrackingId) {
7389              sb.append("\t").append("trackingId=");
7390              String trackingId = null;
7391              if (ugi != null && dtSecretManager != null
7392                  && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) {
7393                for (TokenIdentifier tid: ugi.getTokenIdentifiers()) {
7394                  if (tid instanceof DelegationTokenIdentifier) {
7395                    DelegationTokenIdentifier dtid =
7396                        (DelegationTokenIdentifier)tid;
7397                    trackingId = dtSecretManager.getTokenTrackingId(dtid);
7398                    break;
7399                  }
7400                }
7401              }
7402              sb.append(trackingId);
7403            }
7404            logAuditMessage(sb.toString());
7405          }
7406        }
7407    
7408        public void logAuditMessage(String message) {
7409          auditLog.info(message);
7410        }
7411      }
7412    
7413      private static void enableAsyncAuditLog() {
7414        if (!(auditLog instanceof Log4JLogger)) {
7415          LOG.warn("Log4j is required to enable async auditlog");
7416          return;
7417        }
7418        Logger logger = ((Log4JLogger)auditLog).getLogger();
7419        @SuppressWarnings("unchecked")
7420        List<Appender> appenders = Collections.list(logger.getAllAppenders());
7421        // failsafe against trying to async it more than once
7422        if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) {
7423          AsyncAppender asyncAppender = new AsyncAppender();
7424          // change logger to have an async appender containing all the
7425          // previously configured appenders
7426          for (Appender appender : appenders) {
7427            logger.removeAppender(appender);
7428            asyncAppender.addAppender(appender);
7429          }
7430          logger.addAppender(asyncAppender);        
7431        }
7432      }
7433    }
7434