001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT;
021    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT;
022    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES;
023    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT;
024    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES;
025    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT;
026    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
027    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT;
028    
029    import java.io.DataInput;
030    import java.io.IOException;
031    import java.util.ArrayList;
032    import java.util.Collection;
033    import java.util.Collections;
034    import java.util.Date;
035    import java.util.EnumSet;
036    import java.util.Iterator;
037    import java.util.LinkedList;
038    import java.util.List;
039    import java.util.Map.Entry;
040    import java.util.SortedMap;
041    import java.util.TreeMap;
042    import java.util.concurrent.locks.ReentrantLock;
043    
044    import org.apache.commons.io.IOUtils;
045    import org.apache.commons.logging.Log;
046    import org.apache.commons.logging.LogFactory;
047    import org.apache.hadoop.classification.InterfaceAudience;
048    import org.apache.hadoop.conf.Configuration;
049    import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
050    import org.apache.hadoop.fs.CacheFlag;
051    import org.apache.hadoop.fs.InvalidRequestException;
052    import org.apache.hadoop.fs.Path;
053    import org.apache.hadoop.fs.UnresolvedLinkException;
054    import org.apache.hadoop.fs.permission.FsAction;
055    import org.apache.hadoop.fs.permission.FsPermission;
056    import org.apache.hadoop.hdfs.DFSUtil;
057    import org.apache.hadoop.hdfs.protocol.CacheDirective;
058    import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
059    import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
060    import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
061    import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
062    import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
063    import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
064    import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
065    import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
066    import org.apache.hadoop.hdfs.protocol.DatanodeID;
067    import org.apache.hadoop.hdfs.protocol.LocatedBlock;
068    import org.apache.hadoop.hdfs.protocolPB.PBHelper;
069    import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
070    import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
071    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
072    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList;
073    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
074    import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
075    import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
076    import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
077    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
078    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
079    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
080    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
081    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
082    import org.apache.hadoop.hdfs.util.ReadOnlyList;
083    import org.apache.hadoop.security.AccessControlException;
084    import org.apache.hadoop.util.GSet;
085    import org.apache.hadoop.util.LightWeightGSet;
086    import org.apache.hadoop.util.Time;
087    
088    import com.google.common.annotations.VisibleForTesting;
089    import com.google.common.collect.Lists;
090    
091    /**
092     * The Cache Manager handles caching on DataNodes.
093     *
094     * This class is instantiated by the FSNamesystem.
095     * It maintains the mapping of cached blocks to datanodes via processing
096     * datanode cache reports. Based on these reports and addition and removal of
097     * caching directives, we will schedule caching and uncaching work.
098     */
099    @InterfaceAudience.LimitedPrivate({"HDFS"})
100    public final class CacheManager {
101      public static final Log LOG = LogFactory.getLog(CacheManager.class);
102    
103      private static final float MIN_CACHED_BLOCKS_PERCENT = 0.001f;
104    
105      // TODO: add pending / underCached / schedule cached blocks stats.
106    
107      /**
108       * The FSNamesystem that contains this CacheManager.
109       */
110      private final FSNamesystem namesystem;
111    
112      /**
113       * The BlockManager associated with the FSN that owns this CacheManager.
114       */
115      private final BlockManager blockManager;
116    
117      /**
118       * Cache directives, sorted by ID.
119       *
120       * listCacheDirectives relies on the ordering of elements in this map
121       * to track what has already been listed by the client.
122       */
123      private final TreeMap<Long, CacheDirective> directivesById =
124          new TreeMap<Long, CacheDirective>();
125    
126      /**
127       * The directive ID to use for a new directive.  IDs always increase, and are
128       * never reused.
129       */
130      private long nextDirectiveId;
131    
132      /**
133       * Cache directives, sorted by path
134       */
135      private final TreeMap<String, List<CacheDirective>> directivesByPath =
136          new TreeMap<String, List<CacheDirective>>();
137    
138      /**
139       * Cache pools, sorted by name.
140       */
141      private final TreeMap<String, CachePool> cachePools =
142          new TreeMap<String, CachePool>();
143    
144      /**
145       * Maximum number of cache pools to list in one operation.
146       */
147      private final int maxListCachePoolsResponses;
148    
149      /**
150       * Maximum number of cache pool directives to list in one operation.
151       */
152      private final int maxListCacheDirectivesNumResponses;
153    
154      /**
155       * Interval between scans in milliseconds.
156       */
157      private final long scanIntervalMs;
158    
159      /**
160       * All cached blocks.
161       */
162      private final GSet<CachedBlock, CachedBlock> cachedBlocks;
163    
164      /**
165       * Lock which protects the CacheReplicationMonitor.
166       */
167      private final ReentrantLock crmLock = new ReentrantLock();
168    
169      private final SerializerCompat serializerCompat = new SerializerCompat();
170    
171      /**
172       * The CacheReplicationMonitor.
173       */
174      private CacheReplicationMonitor monitor;
175    
176      public static final class PersistState {
177        public final CacheManagerSection section;
178        public final List<CachePoolInfoProto> pools;
179        public final List<CacheDirectiveInfoProto> directives;
180    
181        public PersistState(CacheManagerSection section,
182            List<CachePoolInfoProto> pools, List<CacheDirectiveInfoProto> directives) {
183          this.section = section;
184          this.pools = pools;
185          this.directives = directives;
186        }
187      }
188    
189      CacheManager(FSNamesystem namesystem, Configuration conf,
190          BlockManager blockManager) {
191        this.namesystem = namesystem;
192        this.blockManager = blockManager;
193        this.nextDirectiveId = 1;
194        this.maxListCachePoolsResponses = conf.getInt(
195            DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES,
196            DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT);
197        this.maxListCacheDirectivesNumResponses = conf.getInt(
198            DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES,
199            DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT);
200        scanIntervalMs = conf.getLong(
201            DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS,
202            DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT);
203        float cachedBlocksPercent = conf.getFloat(
204              DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT,
205              DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT);
206        if (cachedBlocksPercent < MIN_CACHED_BLOCKS_PERCENT) {
207          LOG.info("Using minimum value " + MIN_CACHED_BLOCKS_PERCENT +
208            " for " + DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT);
209          cachedBlocksPercent = MIN_CACHED_BLOCKS_PERCENT;
210        }
211        this.cachedBlocks = new LightWeightGSet<CachedBlock, CachedBlock>(
212              LightWeightGSet.computeCapacity(cachedBlocksPercent,
213                  "cachedBlocks"));
214    
215      }
216    
217      /**
218       * Resets all tracked directives and pools. Called during 2NN checkpointing to
219       * reset FSNamesystem state. See {FSNamesystem{@link #clear()}.
220       */
221      void clear() {
222        directivesById.clear();
223        directivesByPath.clear();
224        cachePools.clear();
225        nextDirectiveId = 1;
226      }
227    
228      public void startMonitorThread() {
229        crmLock.lock();
230        try {
231          if (this.monitor == null) {
232            this.monitor = new CacheReplicationMonitor(namesystem, this,
233                scanIntervalMs, crmLock);
234            this.monitor.start();
235          }
236        } finally {
237          crmLock.unlock();
238        }
239      }
240    
241      public void stopMonitorThread() {
242        crmLock.lock();
243        try {
244          if (this.monitor != null) {
245            CacheReplicationMonitor prevMonitor = this.monitor;
246            this.monitor = null;
247            IOUtils.closeQuietly(prevMonitor);
248          }
249        } finally {
250          crmLock.unlock();
251        }
252      }
253    
254      public void clearDirectiveStats() {
255        assert namesystem.hasWriteLock();
256        for (CacheDirective directive : directivesById.values()) {
257          directive.resetStatistics();
258        }
259      }
260    
261      /**
262       * @return Unmodifiable view of the collection of CachePools.
263       */
264      public Collection<CachePool> getCachePools() {
265        assert namesystem.hasReadLock();
266        return Collections.unmodifiableCollection(cachePools.values());
267      }
268    
269      /**
270       * @return Unmodifiable view of the collection of CacheDirectives.
271       */
272      public Collection<CacheDirective> getCacheDirectives() {
273        assert namesystem.hasReadLock();
274        return Collections.unmodifiableCollection(directivesById.values());
275      }
276      
277      @VisibleForTesting
278      public GSet<CachedBlock, CachedBlock> getCachedBlocks() {
279        assert namesystem.hasReadLock();
280        return cachedBlocks;
281      }
282    
283      private long getNextDirectiveId() throws IOException {
284        assert namesystem.hasWriteLock();
285        if (nextDirectiveId >= Long.MAX_VALUE - 1) {
286          throw new IOException("No more available IDs.");
287        }
288        return nextDirectiveId++;
289      }
290    
291      // Helper getter / validation methods
292    
293      private static void checkWritePermission(FSPermissionChecker pc,
294          CachePool pool) throws AccessControlException {
295        if ((pc != null)) {
296          pc.checkPermission(pool, FsAction.WRITE);
297        }
298      }
299    
300      private static String validatePoolName(CacheDirectiveInfo directive)
301          throws InvalidRequestException {
302        String pool = directive.getPool();
303        if (pool == null) {
304          throw new InvalidRequestException("No pool specified.");
305        }
306        if (pool.isEmpty()) {
307          throw new InvalidRequestException("Invalid empty pool name.");
308        }
309        return pool;
310      }
311    
312      private static String validatePath(CacheDirectiveInfo directive)
313          throws InvalidRequestException {
314        if (directive.getPath() == null) {
315          throw new InvalidRequestException("No path specified.");
316        }
317        String path = directive.getPath().toUri().getPath();
318        if (!DFSUtil.isValidName(path)) {
319          throw new InvalidRequestException("Invalid path '" + path + "'.");
320        }
321        return path;
322      }
323    
324      private static short validateReplication(CacheDirectiveInfo directive,
325          short defaultValue) throws InvalidRequestException {
326        short repl = (directive.getReplication() != null)
327            ? directive.getReplication() : defaultValue;
328        if (repl <= 0) {
329          throw new InvalidRequestException("Invalid replication factor " + repl
330              + " <= 0");
331        }
332        return repl;
333      }
334    
335      /**
336       * Calculates the absolute expiry time of the directive from the
337       * {@link CacheDirectiveInfo.Expiration}. This converts a relative Expiration
338       * into an absolute time based on the local clock.
339       * 
340       * @param info to validate.
341       * @param maxRelativeExpiryTime of the info's pool.
342       * @return the expiration time, or the pool's max absolute expiration if the
343       *         info's expiration was not set.
344       * @throws InvalidRequestException if the info's Expiration is invalid.
345       */
346      private static long validateExpiryTime(CacheDirectiveInfo info,
347          long maxRelativeExpiryTime) throws InvalidRequestException {
348        if (LOG.isTraceEnabled()) {
349          LOG.trace("Validating directive " + info
350              + " pool maxRelativeExpiryTime " + maxRelativeExpiryTime);
351        }
352        final long now = new Date().getTime();
353        final long maxAbsoluteExpiryTime = now + maxRelativeExpiryTime;
354        if (info == null || info.getExpiration() == null) {
355          return maxAbsoluteExpiryTime;
356        }
357        Expiration expiry = info.getExpiration();
358        if (expiry.getMillis() < 0l) {
359          throw new InvalidRequestException("Cannot set a negative expiration: "
360              + expiry.getMillis());
361        }
362        long relExpiryTime, absExpiryTime;
363        if (expiry.isRelative()) {
364          relExpiryTime = expiry.getMillis();
365          absExpiryTime = now + relExpiryTime;
366        } else {
367          absExpiryTime = expiry.getMillis();
368          relExpiryTime = absExpiryTime - now;
369        }
370        // Need to cap the expiry so we don't overflow a long when doing math
371        if (relExpiryTime > Expiration.MAX_RELATIVE_EXPIRY_MS) {
372          throw new InvalidRequestException("Expiration "
373              + expiry.toString() + " is too far in the future!");
374        }
375        // Fail if the requested expiry is greater than the max
376        if (relExpiryTime > maxRelativeExpiryTime) {
377          throw new InvalidRequestException("Expiration " + expiry.toString()
378              + " exceeds the max relative expiration time of "
379              + maxRelativeExpiryTime + " ms.");
380        }
381        return absExpiryTime;
382      }
383    
384      /**
385       * Throws an exception if the CachePool does not have enough capacity to
386       * cache the given path at the replication factor.
387       *
388       * @param pool CachePool where the path is being cached
389       * @param path Path that is being cached
390       * @param replication Replication factor of the path
391       * @throws InvalidRequestException if the pool does not have enough capacity
392       */
393      private void checkLimit(CachePool pool, String path,
394          short replication) throws InvalidRequestException {
395        CacheDirectiveStats stats = computeNeeded(path, replication);
396        if (pool.getLimit() == CachePoolInfo.LIMIT_UNLIMITED) {
397          return;
398        }
399        if (pool.getBytesNeeded() + (stats.getBytesNeeded() * replication) > pool
400            .getLimit()) {
401          throw new InvalidRequestException("Caching path " + path + " of size "
402              + stats.getBytesNeeded() / replication + " bytes at replication "
403              + replication + " would exceed pool " + pool.getPoolName()
404              + "'s remaining capacity of "
405              + (pool.getLimit() - pool.getBytesNeeded()) + " bytes.");
406        }
407      }
408    
409      /**
410       * Computes the needed number of bytes and files for a path.
411       * @return CacheDirectiveStats describing the needed stats for this path
412       */
413      private CacheDirectiveStats computeNeeded(String path, short replication) {
414        FSDirectory fsDir = namesystem.getFSDirectory();
415        INode node;
416        long requestedBytes = 0;
417        long requestedFiles = 0;
418        CacheDirectiveStats.Builder builder = new CacheDirectiveStats.Builder();
419        try {
420          node = fsDir.getINode(path);
421        } catch (UnresolvedLinkException e) {
422          // We don't cache through symlinks
423          return builder.build();
424        }
425        if (node == null) {
426          return builder.build();
427        }
428        if (node.isFile()) {
429          requestedFiles = 1;
430          INodeFile file = node.asFile();
431          requestedBytes = file.computeFileSize();
432        } else if (node.isDirectory()) {
433          INodeDirectory dir = node.asDirectory();
434          ReadOnlyList<INode> children = dir
435              .getChildrenList(Snapshot.CURRENT_STATE_ID);
436          requestedFiles = children.size();
437          for (INode child : children) {
438            if (child.isFile()) {
439              requestedBytes += child.asFile().computeFileSize();
440            }
441          }
442        }
443        return new CacheDirectiveStats.Builder()
444            .setBytesNeeded(requestedBytes)
445            .setFilesCached(requestedFiles)
446            .build();
447      }
448    
449      /**
450       * Get a CacheDirective by ID, validating the ID and that the directive
451       * exists.
452       */
453      private CacheDirective getById(long id) throws InvalidRequestException {
454        // Check for invalid IDs.
455        if (id <= 0) {
456          throw new InvalidRequestException("Invalid negative ID.");
457        }
458        // Find the directive.
459        CacheDirective directive = directivesById.get(id);
460        if (directive == null) {
461          throw new InvalidRequestException("No directive with ID " + id
462              + " found.");
463        }
464        return directive;
465      }
466    
467      /**
468       * Get a CachePool by name, validating that it exists.
469       */
470      private CachePool getCachePool(String poolName)
471          throws InvalidRequestException {
472        CachePool pool = cachePools.get(poolName);
473        if (pool == null) {
474          throw new InvalidRequestException("Unknown pool " + poolName);
475        }
476        return pool;
477      }
478    
479      // RPC handlers
480    
481      private void addInternal(CacheDirective directive, CachePool pool) {
482        boolean addedDirective = pool.getDirectiveList().add(directive);
483        assert addedDirective;
484        directivesById.put(directive.getId(), directive);
485        String path = directive.getPath();
486        List<CacheDirective> directives = directivesByPath.get(path);
487        if (directives == null) {
488          directives = new ArrayList<CacheDirective>(1);
489          directivesByPath.put(path, directives);
490        }
491        directives.add(directive);
492        // Fix up pool stats
493        CacheDirectiveStats stats =
494            computeNeeded(directive.getPath(), directive.getReplication());
495        directive.addBytesNeeded(stats.getBytesNeeded());
496        directive.addFilesNeeded(directive.getFilesNeeded());
497    
498        setNeedsRescan();
499      }
500    
501      /**
502       * Adds a directive, skipping most error checking. This should only be called
503       * internally in special scenarios like edit log replay.
504       */
505      CacheDirectiveInfo addDirectiveFromEditLog(CacheDirectiveInfo directive)
506          throws InvalidRequestException {
507        long id = directive.getId();
508        CacheDirective entry = new CacheDirective(directive);
509        CachePool pool = cachePools.get(directive.getPool());
510        addInternal(entry, pool);
511        if (nextDirectiveId <= id) {
512          nextDirectiveId = id + 1;
513        }
514        return entry.toInfo();
515      }
516    
517      public CacheDirectiveInfo addDirective(
518          CacheDirectiveInfo info, FSPermissionChecker pc, EnumSet<CacheFlag> flags)
519          throws IOException {
520        assert namesystem.hasWriteLock();
521        CacheDirective directive;
522        try {
523          CachePool pool = getCachePool(validatePoolName(info));
524          checkWritePermission(pc, pool);
525          String path = validatePath(info);
526          short replication = validateReplication(info, (short)1);
527          long expiryTime = validateExpiryTime(info, pool.getMaxRelativeExpiryMs());
528          // Do quota validation if required
529          if (!flags.contains(CacheFlag.FORCE)) {
530            checkLimit(pool, path, replication);
531          }
532          // All validation passed
533          // Add a new entry with the next available ID.
534          long id = getNextDirectiveId();
535          directive = new CacheDirective(id, path, replication, expiryTime);
536          addInternal(directive, pool);
537        } catch (IOException e) {
538          LOG.warn("addDirective of " + info + " failed: ", e);
539          throw e;
540        }
541        LOG.info("addDirective of " + info + " successful.");
542        return directive.toInfo();
543      }
544    
545      /**
546       * Factory method that makes a new CacheDirectiveInfo by applying fields in a
547       * CacheDirectiveInfo to an existing CacheDirective.
548       * 
549       * @param info with some or all fields set.
550       * @param defaults directive providing default values for unset fields in
551       *          info.
552       * 
553       * @return new CacheDirectiveInfo of the info applied to the defaults.
554       */
555      private static CacheDirectiveInfo createFromInfoAndDefaults(
556          CacheDirectiveInfo info, CacheDirective defaults) {
557        // Initialize the builder with the default values
558        CacheDirectiveInfo.Builder builder =
559            new CacheDirectiveInfo.Builder(defaults.toInfo());
560        // Replace default with new value if present
561        if (info.getPath() != null) {
562          builder.setPath(info.getPath());
563        }
564        if (info.getReplication() != null) {
565          builder.setReplication(info.getReplication());
566        }
567        if (info.getPool() != null) {
568          builder.setPool(info.getPool());
569        }
570        if (info.getExpiration() != null) {
571          builder.setExpiration(info.getExpiration());
572        }
573        return builder.build();
574      }
575    
576      /**
577       * Modifies a directive, skipping most error checking. This is for careful
578       * internal use only. modifyDirective can be non-deterministic since its error
579       * checking depends on current system time, which poses a problem for edit log
580       * replay.
581       */
582      void modifyDirectiveFromEditLog(CacheDirectiveInfo info)
583          throws InvalidRequestException {
584        // Check for invalid IDs.
585        Long id = info.getId();
586        if (id == null) {
587          throw new InvalidRequestException("Must supply an ID.");
588        }
589        CacheDirective prevEntry = getById(id);
590        CacheDirectiveInfo newInfo = createFromInfoAndDefaults(info, prevEntry);
591        removeInternal(prevEntry);
592        addInternal(new CacheDirective(newInfo), getCachePool(newInfo.getPool()));
593      }
594    
595      public void modifyDirective(CacheDirectiveInfo info,
596          FSPermissionChecker pc, EnumSet<CacheFlag> flags) throws IOException {
597        assert namesystem.hasWriteLock();
598        String idString =
599            (info.getId() == null) ?
600                "(null)" : info.getId().toString();
601        try {
602          // Check for invalid IDs.
603          Long id = info.getId();
604          if (id == null) {
605            throw new InvalidRequestException("Must supply an ID.");
606          }
607          CacheDirective prevEntry = getById(id);
608          checkWritePermission(pc, prevEntry.getPool());
609    
610          // Fill in defaults
611          CacheDirectiveInfo infoWithDefaults =
612              createFromInfoAndDefaults(info, prevEntry);
613          CacheDirectiveInfo.Builder builder =
614              new CacheDirectiveInfo.Builder(infoWithDefaults);
615    
616          // Do validation
617          validatePath(infoWithDefaults);
618          validateReplication(infoWithDefaults, (short)-1);
619          // Need to test the pool being set here to avoid rejecting a modify for a
620          // directive that's already been forced into a pool
621          CachePool srcPool = prevEntry.getPool();
622          CachePool destPool = getCachePool(validatePoolName(infoWithDefaults));
623          if (!srcPool.getPoolName().equals(destPool.getPoolName())) {
624            checkWritePermission(pc, destPool);
625            if (!flags.contains(CacheFlag.FORCE)) {
626              checkLimit(destPool, infoWithDefaults.getPath().toUri().getPath(),
627                  infoWithDefaults.getReplication());
628            }
629          }
630          // Verify the expiration against the destination pool
631          validateExpiryTime(infoWithDefaults, destPool.getMaxRelativeExpiryMs());
632    
633          // Indicate changes to the CRM
634          setNeedsRescan();
635    
636          // Validation passed
637          removeInternal(prevEntry);
638          addInternal(new CacheDirective(builder.build()), destPool);
639        } catch (IOException e) {
640          LOG.warn("modifyDirective of " + idString + " failed: ", e);
641          throw e;
642        }
643        LOG.info("modifyDirective of " + idString + " successfully applied " +
644            info+ ".");
645      }
646    
647      private void removeInternal(CacheDirective directive)
648          throws InvalidRequestException {
649        assert namesystem.hasWriteLock();
650        // Remove the corresponding entry in directivesByPath.
651        String path = directive.getPath();
652        List<CacheDirective> directives = directivesByPath.get(path);
653        if (directives == null || !directives.remove(directive)) {
654          throw new InvalidRequestException("Failed to locate entry " +
655              directive.getId() + " by path " + directive.getPath());
656        }
657        if (directives.size() == 0) {
658          directivesByPath.remove(path);
659        }
660        // Fix up the stats from removing the pool
661        final CachePool pool = directive.getPool();
662        directive.addBytesNeeded(-directive.getBytesNeeded());
663        directive.addFilesNeeded(-directive.getFilesNeeded());
664    
665        directivesById.remove(directive.getId());
666        pool.getDirectiveList().remove(directive);
667        assert directive.getPool() == null;
668    
669        setNeedsRescan();
670      }
671    
672      public void removeDirective(long id, FSPermissionChecker pc)
673          throws IOException {
674        assert namesystem.hasWriteLock();
675        try {
676          CacheDirective directive = getById(id);
677          checkWritePermission(pc, directive.getPool());
678          removeInternal(directive);
679        } catch (IOException e) {
680          LOG.warn("removeDirective of " + id + " failed: ", e);
681          throw e;
682        }
683        LOG.info("removeDirective of " + id + " successful.");
684      }
685    
686      public BatchedListEntries<CacheDirectiveEntry> 
687            listCacheDirectives(long prevId,
688                CacheDirectiveInfo filter,
689                FSPermissionChecker pc) throws IOException {
690        assert namesystem.hasReadLock();
691        final int NUM_PRE_ALLOCATED_ENTRIES = 16;
692        String filterPath = null;
693        if (filter.getId() != null) {
694          throw new IOException("Filtering by ID is unsupported.");
695        }
696        if (filter.getPath() != null) {
697          filterPath = validatePath(filter);
698        }
699        if (filter.getReplication() != null) {
700          throw new IOException("Filtering by replication is unsupported.");
701        }
702        ArrayList<CacheDirectiveEntry> replies =
703            new ArrayList<CacheDirectiveEntry>(NUM_PRE_ALLOCATED_ENTRIES);
704        int numReplies = 0;
705        SortedMap<Long, CacheDirective> tailMap =
706          directivesById.tailMap(prevId + 1);
707        for (Entry<Long, CacheDirective> cur : tailMap.entrySet()) {
708          if (numReplies >= maxListCacheDirectivesNumResponses) {
709            return new BatchedListEntries<CacheDirectiveEntry>(replies, true);
710          }
711          CacheDirective curDirective = cur.getValue();
712          CacheDirectiveInfo info = cur.getValue().toInfo();
713          if (filter.getPool() != null && 
714              !info.getPool().equals(filter.getPool())) {
715            continue;
716          }
717          if (filterPath != null &&
718              !info.getPath().toUri().getPath().equals(filterPath)) {
719            continue;
720          }
721          boolean hasPermission = true;
722          if (pc != null) {
723            try {
724              pc.checkPermission(curDirective.getPool(), FsAction.READ);
725            } catch (AccessControlException e) {
726              hasPermission = false;
727            }
728          }
729          if (hasPermission) {
730            replies.add(new CacheDirectiveEntry(info, cur.getValue().toStats()));
731            numReplies++;
732          }
733        }
734        return new BatchedListEntries<CacheDirectiveEntry>(replies, false);
735      }
736    
737      /**
738       * Create a cache pool.
739       * 
740       * Only the superuser should be able to call this function.
741       *
742       * @param info    The info for the cache pool to create.
743       * @return        Information about the cache pool we created.
744       */
745      public CachePoolInfo addCachePool(CachePoolInfo info)
746          throws IOException {
747        assert namesystem.hasWriteLock();
748        CachePool pool;
749        try {
750          CachePoolInfo.validate(info);
751          String poolName = info.getPoolName();
752          pool = cachePools.get(poolName);
753          if (pool != null) {
754            throw new InvalidRequestException("Cache pool " + poolName
755                + " already exists.");
756          }
757          pool = CachePool.createFromInfoAndDefaults(info);
758          cachePools.put(pool.getPoolName(), pool);
759        } catch (IOException e) {
760          LOG.info("addCachePool of " + info + " failed: ", e);
761          throw e;
762        }
763        LOG.info("addCachePool of " + info + " successful.");
764        return pool.getInfo(true);
765      }
766    
767      /**
768       * Modify a cache pool.
769       * 
770       * Only the superuser should be able to call this function.
771       *
772       * @param info
773       *          The info for the cache pool to modify.
774       */
775      public void modifyCachePool(CachePoolInfo info)
776          throws IOException {
777        assert namesystem.hasWriteLock();
778        StringBuilder bld = new StringBuilder();
779        try {
780          CachePoolInfo.validate(info);
781          String poolName = info.getPoolName();
782          CachePool pool = cachePools.get(poolName);
783          if (pool == null) {
784            throw new InvalidRequestException("Cache pool " + poolName
785                + " does not exist.");
786          }
787          String prefix = "";
788          if (info.getOwnerName() != null) {
789            pool.setOwnerName(info.getOwnerName());
790            bld.append(prefix).
791              append("set owner to ").append(info.getOwnerName());
792            prefix = "; ";
793          }
794          if (info.getGroupName() != null) {
795            pool.setGroupName(info.getGroupName());
796            bld.append(prefix).
797              append("set group to ").append(info.getGroupName());
798            prefix = "; ";
799          }
800          if (info.getMode() != null) {
801            pool.setMode(info.getMode());
802            bld.append(prefix).append("set mode to " + info.getMode());
803            prefix = "; ";
804          }
805          if (info.getLimit() != null) {
806            pool.setLimit(info.getLimit());
807            bld.append(prefix).append("set limit to " + info.getLimit());
808            prefix = "; ";
809            // New limit changes stats, need to set needs refresh
810            setNeedsRescan();
811          }
812          if (info.getMaxRelativeExpiryMs() != null) {
813            final Long maxRelativeExpiry = info.getMaxRelativeExpiryMs();
814            pool.setMaxRelativeExpiryMs(maxRelativeExpiry);
815            bld.append(prefix).append("set maxRelativeExpiry to "
816                + maxRelativeExpiry);
817            prefix = "; ";
818          }
819          if (prefix.isEmpty()) {
820            bld.append("no changes.");
821          }
822        } catch (IOException e) {
823          LOG.info("modifyCachePool of " + info + " failed: ", e);
824          throw e;
825        }
826        LOG.info("modifyCachePool of " + info.getPoolName() + " successful; "
827            + bld.toString());
828      }
829    
830      /**
831       * Remove a cache pool.
832       * 
833       * Only the superuser should be able to call this function.
834       *
835       * @param poolName
836       *          The name for the cache pool to remove.
837       */
838      public void removeCachePool(String poolName)
839          throws IOException {
840        assert namesystem.hasWriteLock();
841        try {
842          CachePoolInfo.validateName(poolName);
843          CachePool pool = cachePools.remove(poolName);
844          if (pool == null) {
845            throw new InvalidRequestException(
846                "Cannot remove non-existent cache pool " + poolName);
847          }
848          // Remove all directives in this pool.
849          Iterator<CacheDirective> iter = pool.getDirectiveList().iterator();
850          while (iter.hasNext()) {
851            CacheDirective directive = iter.next();
852            directivesByPath.remove(directive.getPath());
853            directivesById.remove(directive.getId());
854            iter.remove();
855          }
856          setNeedsRescan();
857        } catch (IOException e) {
858          LOG.info("removeCachePool of " + poolName + " failed: ", e);
859          throw e;
860        }
861        LOG.info("removeCachePool of " + poolName + " successful.");
862      }
863    
864      public BatchedListEntries<CachePoolEntry>
865          listCachePools(FSPermissionChecker pc, String prevKey) {
866        assert namesystem.hasReadLock();
867        final int NUM_PRE_ALLOCATED_ENTRIES = 16;
868        ArrayList<CachePoolEntry> results = 
869            new ArrayList<CachePoolEntry>(NUM_PRE_ALLOCATED_ENTRIES);
870        SortedMap<String, CachePool> tailMap = cachePools.tailMap(prevKey, false);
871        int numListed = 0;
872        for (Entry<String, CachePool> cur : tailMap.entrySet()) {
873          if (numListed++ >= maxListCachePoolsResponses) {
874            return new BatchedListEntries<CachePoolEntry>(results, true);
875          }
876          results.add(cur.getValue().getEntry(pc));
877        }
878        return new BatchedListEntries<CachePoolEntry>(results, false);
879      }
880    
881      public void setCachedLocations(LocatedBlock block) {
882        CachedBlock cachedBlock =
883            new CachedBlock(block.getBlock().getBlockId(),
884                (short)0, false);
885        cachedBlock = cachedBlocks.get(cachedBlock);
886        if (cachedBlock == null) {
887          return;
888        }
889        List<DatanodeDescriptor> datanodes = cachedBlock.getDatanodes(Type.CACHED);
890        for (DatanodeDescriptor datanode : datanodes) {
891          block.addCachedLoc(datanode);
892        }
893      }
894    
895      public final void processCacheReport(final DatanodeID datanodeID,
896          final List<Long> blockIds) throws IOException {
897        namesystem.writeLock();
898        final long startTime = Time.monotonicNow();
899        final long endTime;
900        try {
901          final DatanodeDescriptor datanode = 
902              blockManager.getDatanodeManager().getDatanode(datanodeID);
903          if (datanode == null || !datanode.isAlive) {
904            throw new IOException(
905                "processCacheReport from dead or unregistered datanode: " +
906                datanode);
907          }
908          processCacheReportImpl(datanode, blockIds);
909        } finally {
910          endTime = Time.monotonicNow();
911          namesystem.writeUnlock();
912        }
913    
914        // Log the block report processing stats from Namenode perspective
915        final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
916        if (metrics != null) {
917          metrics.addCacheBlockReport((int) (endTime - startTime));
918        }
919        if (LOG.isDebugEnabled()) {
920          LOG.debug("Processed cache report from "
921              + datanodeID + ", blocks: " + blockIds.size()
922              + ", processing time: " + (endTime - startTime) + " msecs");
923        }
924      }
925    
926      private void processCacheReportImpl(final DatanodeDescriptor datanode,
927          final List<Long> blockIds) {
928        CachedBlocksList cached = datanode.getCached();
929        cached.clear();
930        CachedBlocksList cachedList = datanode.getCached();
931        CachedBlocksList pendingCachedList = datanode.getPendingCached();
932        for (Iterator<Long> iter = blockIds.iterator(); iter.hasNext(); ) {
933          long blockId = iter.next();
934          CachedBlock cachedBlock =
935              new CachedBlock(blockId, (short)0, false);
936          CachedBlock prevCachedBlock = cachedBlocks.get(cachedBlock);
937          // Add the block ID from the cache report to the cachedBlocks map
938          // if it's not already there.
939          if (prevCachedBlock != null) {
940            cachedBlock = prevCachedBlock;
941          } else {
942            cachedBlocks.put(cachedBlock);
943          }
944          // Add the block to the datanode's implicit cached block list
945          // if it's not already there.  Similarly, remove it from the pending
946          // cached block list if it exists there.
947          if (!cachedBlock.isPresent(cachedList)) {
948            cachedList.add(cachedBlock);
949          }
950          if (cachedBlock.isPresent(pendingCachedList)) {
951            pendingCachedList.remove(cachedBlock);
952          }
953        }
954      }
955    
956      public PersistState saveState() throws IOException {
957        ArrayList<CachePoolInfoProto> pools = Lists
958            .newArrayListWithCapacity(cachePools.size());
959        ArrayList<CacheDirectiveInfoProto> directives = Lists
960            .newArrayListWithCapacity(directivesById.size());
961    
962        for (CachePool pool : cachePools.values()) {
963          CachePoolInfo p = pool.getInfo(true);
964          CachePoolInfoProto.Builder b = CachePoolInfoProto.newBuilder()
965              .setPoolName(p.getPoolName());
966    
967          if (p.getOwnerName() != null)
968            b.setOwnerName(p.getOwnerName());
969    
970          if (p.getGroupName() != null)
971            b.setGroupName(p.getGroupName());
972    
973          if (p.getMode() != null)
974            b.setMode(p.getMode().toShort());
975    
976          if (p.getLimit() != null)
977            b.setLimit(p.getLimit());
978    
979          pools.add(b.build());
980        }
981    
982        for (CacheDirective directive : directivesById.values()) {
983          CacheDirectiveInfo info = directive.toInfo();
984          CacheDirectiveInfoProto.Builder b = CacheDirectiveInfoProto.newBuilder()
985              .setId(info.getId());
986    
987          if (info.getPath() != null) {
988            b.setPath(info.getPath().toUri().getPath());
989          }
990    
991          if (info.getReplication() != null) {
992            b.setReplication(info.getReplication());
993          }
994    
995          if (info.getPool() != null) {
996            b.setPool(info.getPool());
997          }
998    
999          Expiration expiry = info.getExpiration();
1000          if (expiry != null) {
1001            assert (!expiry.isRelative());
1002            b.setExpiration(PBHelper.convert(expiry));
1003          }
1004    
1005          directives.add(b.build());
1006        }
1007        CacheManagerSection s = CacheManagerSection.newBuilder()
1008            .setNextDirectiveId(nextDirectiveId).setNumPools(pools.size())
1009            .setNumDirectives(directives.size()).build();
1010    
1011        return new PersistState(s, pools, directives);
1012      }
1013    
1014      /**
1015       * Reloads CacheManager state from the passed DataInput. Used during namenode
1016       * startup to restore CacheManager state from an FSImage.
1017       * @param in DataInput from which to restore state
1018       * @throws IOException
1019       */
1020      public void loadStateCompat(DataInput in) throws IOException {
1021        serializerCompat.load(in);
1022      }
1023    
1024      public void loadState(PersistState s) throws IOException {
1025        nextDirectiveId = s.section.getNextDirectiveId();
1026        for (CachePoolInfoProto p : s.pools) {
1027          CachePoolInfo info = new CachePoolInfo(p.getPoolName());
1028          if (p.hasOwnerName())
1029            info.setOwnerName(p.getOwnerName());
1030    
1031          if (p.hasGroupName())
1032            info.setGroupName(p.getGroupName());
1033    
1034          if (p.hasMode())
1035            info.setMode(new FsPermission((short) p.getMode()));
1036    
1037          if (p.hasLimit())
1038            info.setLimit(p.getLimit());
1039    
1040          addCachePool(info);
1041        }
1042    
1043        for (CacheDirectiveInfoProto p : s.directives) {
1044          // Get pool reference by looking it up in the map
1045          final String poolName = p.getPool();
1046          CacheDirective directive = new CacheDirective(p.getId(), new Path(
1047              p.getPath()).toUri().getPath(), (short) p.getReplication(), p
1048              .getExpiration().getMillis());
1049          addCacheDirective(poolName, directive);
1050        }
1051      }
1052    
1053      private void addCacheDirective(final String poolName,
1054          final CacheDirective directive) throws IOException {
1055        CachePool pool = cachePools.get(poolName);
1056        if (pool == null) {
1057          throw new IOException("Directive refers to pool " + poolName
1058              + ", which does not exist.");
1059        }
1060        boolean addedDirective = pool.getDirectiveList().add(directive);
1061        assert addedDirective;
1062        if (directivesById.put(directive.getId(), directive) != null) {
1063          throw new IOException("A directive with ID " + directive.getId()
1064              + " already exists");
1065        }
1066        List<CacheDirective> directives = directivesByPath.get(directive.getPath());
1067        if (directives == null) {
1068          directives = new LinkedList<CacheDirective>();
1069          directivesByPath.put(directive.getPath(), directives);
1070        }
1071        directives.add(directive);
1072      }
1073    
1074      private final class SerializerCompat {
1075        private void load(DataInput in) throws IOException {
1076          nextDirectiveId = in.readLong();
1077          // pools need to be loaded first since directives point to their parent pool
1078          loadPools(in);
1079          loadDirectives(in);
1080        }
1081    
1082        /**
1083         * Load cache pools from fsimage
1084         */
1085        private void loadPools(DataInput in)
1086            throws IOException {
1087          StartupProgress prog = NameNode.getStartupProgress();
1088          Step step = new Step(StepType.CACHE_POOLS);
1089          prog.beginStep(Phase.LOADING_FSIMAGE, step);
1090          int numberOfPools = in.readInt();
1091          prog.setTotal(Phase.LOADING_FSIMAGE, step, numberOfPools);
1092          Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step);
1093          for (int i = 0; i < numberOfPools; i++) {
1094            addCachePool(FSImageSerialization.readCachePoolInfo(in));
1095            counter.increment();
1096          }
1097          prog.endStep(Phase.LOADING_FSIMAGE, step);
1098        }
1099    
1100        /**
1101         * Load cache directives from the fsimage
1102         */
1103        private void loadDirectives(DataInput in) throws IOException {
1104          StartupProgress prog = NameNode.getStartupProgress();
1105          Step step = new Step(StepType.CACHE_ENTRIES);
1106          prog.beginStep(Phase.LOADING_FSIMAGE, step);
1107          int numDirectives = in.readInt();
1108          prog.setTotal(Phase.LOADING_FSIMAGE, step, numDirectives);
1109          Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step);
1110          for (int i = 0; i < numDirectives; i++) {
1111            CacheDirectiveInfo info = FSImageSerialization.readCacheDirectiveInfo(in);
1112            // Get pool reference by looking it up in the map
1113            final String poolName = info.getPool();
1114            CacheDirective directive =
1115                new CacheDirective(info.getId(), info.getPath().toUri().getPath(),
1116                    info.getReplication(), info.getExpiration().getAbsoluteMillis());
1117            addCacheDirective(poolName, directive);
1118            counter.increment();
1119          }
1120          prog.endStep(Phase.LOADING_FSIMAGE, step);
1121        }
1122      }
1123    
1124      public void waitForRescanIfNeeded() {
1125        crmLock.lock();
1126        try {
1127          if (monitor != null) {
1128            monitor.waitForRescanIfNeeded();
1129          }
1130        } finally {
1131          crmLock.unlock();
1132        }
1133      }
1134    
1135      private void setNeedsRescan() {
1136        crmLock.lock();
1137        try {
1138          if (monitor != null) {
1139            monitor.setNeedsRescan();
1140          }
1141        } finally {
1142          crmLock.unlock();
1143        }
1144      }
1145    
1146      @VisibleForTesting
1147      public Thread getCacheReplicationMonitor() {
1148        crmLock.lock();
1149        try {
1150          return monitor;
1151        } finally {
1152          crmLock.unlock();
1153        }
1154      }
1155    }