001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT;
021import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT;
022import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES;
023import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT;
024import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES;
025import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT;
026import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
027import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT;
028
029import java.io.DataInput;
030import java.io.DataOutputStream;
031import java.io.IOException;
032import java.util.ArrayList;
033import java.util.Collection;
034import java.util.Collections;
035import java.util.Date;
036import java.util.EnumSet;
037import java.util.Iterator;
038import java.util.LinkedList;
039import java.util.List;
040import java.util.Map.Entry;
041import java.util.SortedMap;
042import java.util.TreeMap;
043import java.util.concurrent.locks.ReentrantLock;
044
045import org.apache.commons.io.IOUtils;
046import org.apache.commons.logging.Log;
047import org.apache.commons.logging.LogFactory;
048import org.apache.hadoop.classification.InterfaceAudience;
049import org.apache.hadoop.conf.Configuration;
050import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
051import org.apache.hadoop.fs.CacheFlag;
052import org.apache.hadoop.fs.InvalidRequestException;
053import org.apache.hadoop.fs.Path;
054import org.apache.hadoop.fs.UnresolvedLinkException;
055import org.apache.hadoop.fs.permission.FsAction;
056import org.apache.hadoop.fs.permission.FsPermission;
057import org.apache.hadoop.hdfs.DFSUtil;
058import org.apache.hadoop.hdfs.protocol.CacheDirective;
059import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
060import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
061import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
062import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
063import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
064import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
065import org.apache.hadoop.hdfs.protocol.DatanodeID;
066import org.apache.hadoop.hdfs.protocol.LocatedBlock;
067import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
068import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
069import org.apache.hadoop.hdfs.protocolPB.PBHelper;
070import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
071import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
072import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
073import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList;
074import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
075import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
076import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
077import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
078import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
079import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
080import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
081import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
082import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
083import org.apache.hadoop.hdfs.util.ReadOnlyList;
084import org.apache.hadoop.security.AccessControlException;
085import org.apache.hadoop.util.GSet;
086import org.apache.hadoop.util.LightWeightGSet;
087import org.apache.hadoop.util.Time;
088
089import com.google.common.annotations.VisibleForTesting;
090import com.google.common.collect.Lists;
091
092/**
093 * The Cache Manager handles caching on DataNodes.
094 *
095 * This class is instantiated by the FSNamesystem.
096 * It maintains the mapping of cached blocks to datanodes via processing
097 * datanode cache reports. Based on these reports and addition and removal of
098 * caching directives, we will schedule caching and uncaching work.
099 */
100@InterfaceAudience.LimitedPrivate({"HDFS"})
101public final class CacheManager {
102  public static final Log LOG = LogFactory.getLog(CacheManager.class);
103
104  private static final float MIN_CACHED_BLOCKS_PERCENT = 0.001f;
105
106  // TODO: add pending / underCached / schedule cached blocks stats.
107
108  /**
109   * The FSNamesystem that contains this CacheManager.
110   */
111  private final FSNamesystem namesystem;
112
113  /**
114   * The BlockManager associated with the FSN that owns this CacheManager.
115   */
116  private final BlockManager blockManager;
117
118  /**
119   * Cache directives, sorted by ID.
120   *
121   * listCacheDirectives relies on the ordering of elements in this map
122   * to track what has already been listed by the client.
123   */
124  private final TreeMap<Long, CacheDirective> directivesById =
125      new TreeMap<Long, CacheDirective>();
126
127  /**
128   * The directive ID to use for a new directive.  IDs always increase, and are
129   * never reused.
130   */
131  private long nextDirectiveId;
132
133  /**
134   * Cache directives, sorted by path
135   */
136  private final TreeMap<String, List<CacheDirective>> directivesByPath =
137      new TreeMap<String, List<CacheDirective>>();
138
139  /**
140   * Cache pools, sorted by name.
141   */
142  private final TreeMap<String, CachePool> cachePools =
143      new TreeMap<String, CachePool>();
144
145  /**
146   * Maximum number of cache pools to list in one operation.
147   */
148  private final int maxListCachePoolsResponses;
149
150  /**
151   * Maximum number of cache pool directives to list in one operation.
152   */
153  private final int maxListCacheDirectivesNumResponses;
154
155  /**
156   * Interval between scans in milliseconds.
157   */
158  private final long scanIntervalMs;
159
160  /**
161   * All cached blocks.
162   */
163  private final GSet<CachedBlock, CachedBlock> cachedBlocks;
164
165  /**
166   * Lock which protects the CacheReplicationMonitor.
167   */
168  private final ReentrantLock crmLock = new ReentrantLock();
169
170  private final SerializerCompat serializerCompat = new SerializerCompat();
171
172  /**
173   * The CacheReplicationMonitor.
174   */
175  private CacheReplicationMonitor monitor;
176
177  public static final class PersistState {
178    public final CacheManagerSection section;
179    public final List<CachePoolInfoProto> pools;
180    public final List<CacheDirectiveInfoProto> directives;
181
182    public PersistState(CacheManagerSection section,
183        List<CachePoolInfoProto> pools, List<CacheDirectiveInfoProto> directives) {
184      this.section = section;
185      this.pools = pools;
186      this.directives = directives;
187    }
188  }
189
190  CacheManager(FSNamesystem namesystem, Configuration conf,
191      BlockManager blockManager) {
192    this.namesystem = namesystem;
193    this.blockManager = blockManager;
194    this.nextDirectiveId = 1;
195    this.maxListCachePoolsResponses = conf.getInt(
196        DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES,
197        DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT);
198    this.maxListCacheDirectivesNumResponses = conf.getInt(
199        DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES,
200        DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT);
201    scanIntervalMs = conf.getLong(
202        DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS,
203        DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT);
204    float cachedBlocksPercent = conf.getFloat(
205          DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT,
206          DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT);
207    if (cachedBlocksPercent < MIN_CACHED_BLOCKS_PERCENT) {
208      LOG.info("Using minimum value " + MIN_CACHED_BLOCKS_PERCENT +
209        " for " + DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT);
210      cachedBlocksPercent = MIN_CACHED_BLOCKS_PERCENT;
211    }
212    this.cachedBlocks = new LightWeightGSet<CachedBlock, CachedBlock>(
213          LightWeightGSet.computeCapacity(cachedBlocksPercent,
214              "cachedBlocks"));
215
216  }
217
218  /**
219   * Resets all tracked directives and pools. Called during 2NN checkpointing to
220   * reset FSNamesystem state. See {@link FSNamesystem#clear()}.
221   */
222  void clear() {
223    directivesById.clear();
224    directivesByPath.clear();
225    cachePools.clear();
226    nextDirectiveId = 1;
227  }
228
229  public void startMonitorThread() {
230    crmLock.lock();
231    try {
232      if (this.monitor == null) {
233        this.monitor = new CacheReplicationMonitor(namesystem, this,
234            scanIntervalMs, crmLock);
235        this.monitor.start();
236      }
237    } finally {
238      crmLock.unlock();
239    }
240  }
241
242  public void stopMonitorThread() {
243    crmLock.lock();
244    try {
245      if (this.monitor != null) {
246        CacheReplicationMonitor prevMonitor = this.monitor;
247        this.monitor = null;
248        IOUtils.closeQuietly(prevMonitor);
249      }
250    } finally {
251      crmLock.unlock();
252    }
253  }
254
255  public void clearDirectiveStats() {
256    assert namesystem.hasWriteLock();
257    for (CacheDirective directive : directivesById.values()) {
258      directive.resetStatistics();
259    }
260  }
261
262  /**
263   * @return Unmodifiable view of the collection of CachePools.
264   */
265  public Collection<CachePool> getCachePools() {
266    assert namesystem.hasReadLock();
267    return Collections.unmodifiableCollection(cachePools.values());
268  }
269
270  /**
271   * @return Unmodifiable view of the collection of CacheDirectives.
272   */
273  public Collection<CacheDirective> getCacheDirectives() {
274    assert namesystem.hasReadLock();
275    return Collections.unmodifiableCollection(directivesById.values());
276  }
277  
278  @VisibleForTesting
279  public GSet<CachedBlock, CachedBlock> getCachedBlocks() {
280    assert namesystem.hasReadLock();
281    return cachedBlocks;
282  }
283
284  private long getNextDirectiveId() throws IOException {
285    assert namesystem.hasWriteLock();
286    if (nextDirectiveId >= Long.MAX_VALUE - 1) {
287      throw new IOException("No more available IDs.");
288    }
289    return nextDirectiveId++;
290  }
291
292  // Helper getter / validation methods
293
294  private static void checkWritePermission(FSPermissionChecker pc,
295      CachePool pool) throws AccessControlException {
296    if ((pc != null)) {
297      pc.checkPermission(pool, FsAction.WRITE);
298    }
299  }
300
301  private static String validatePoolName(CacheDirectiveInfo directive)
302      throws InvalidRequestException {
303    String pool = directive.getPool();
304    if (pool == null) {
305      throw new InvalidRequestException("No pool specified.");
306    }
307    if (pool.isEmpty()) {
308      throw new InvalidRequestException("Invalid empty pool name.");
309    }
310    return pool;
311  }
312
313  private static String validatePath(CacheDirectiveInfo directive)
314      throws InvalidRequestException {
315    if (directive.getPath() == null) {
316      throw new InvalidRequestException("No path specified.");
317    }
318    String path = directive.getPath().toUri().getPath();
319    if (!DFSUtil.isValidName(path)) {
320      throw new InvalidRequestException("Invalid path '" + path + "'.");
321    }
322    return path;
323  }
324
325  private static short validateReplication(CacheDirectiveInfo directive,
326      short defaultValue) throws InvalidRequestException {
327    short repl = (directive.getReplication() != null)
328        ? directive.getReplication() : defaultValue;
329    if (repl <= 0) {
330      throw new InvalidRequestException("Invalid replication factor " + repl
331          + " <= 0");
332    }
333    return repl;
334  }
335
336  /**
337   * Calculates the absolute expiry time of the directive from the
338   * {@link CacheDirectiveInfo.Expiration}. This converts a relative Expiration
339   * into an absolute time based on the local clock.
340   * 
341   * @param info to validate.
342   * @param maxRelativeExpiryTime of the info's pool.
343   * @return the expiration time, or the pool's max absolute expiration if the
344   *         info's expiration was not set.
345   * @throws InvalidRequestException if the info's Expiration is invalid.
346   */
347  private static long validateExpiryTime(CacheDirectiveInfo info,
348      long maxRelativeExpiryTime) throws InvalidRequestException {
349    if (LOG.isTraceEnabled()) {
350      LOG.trace("Validating directive " + info
351          + " pool maxRelativeExpiryTime " + maxRelativeExpiryTime);
352    }
353    final long now = new Date().getTime();
354    final long maxAbsoluteExpiryTime = now + maxRelativeExpiryTime;
355    if (info == null || info.getExpiration() == null) {
356      return maxAbsoluteExpiryTime;
357    }
358    Expiration expiry = info.getExpiration();
359    if (expiry.getMillis() < 0l) {
360      throw new InvalidRequestException("Cannot set a negative expiration: "
361          + expiry.getMillis());
362    }
363    long relExpiryTime, absExpiryTime;
364    if (expiry.isRelative()) {
365      relExpiryTime = expiry.getMillis();
366      absExpiryTime = now + relExpiryTime;
367    } else {
368      absExpiryTime = expiry.getMillis();
369      relExpiryTime = absExpiryTime - now;
370    }
371    // Need to cap the expiry so we don't overflow a long when doing math
372    if (relExpiryTime > Expiration.MAX_RELATIVE_EXPIRY_MS) {
373      throw new InvalidRequestException("Expiration "
374          + expiry.toString() + " is too far in the future!");
375    }
376    // Fail if the requested expiry is greater than the max
377    if (relExpiryTime > maxRelativeExpiryTime) {
378      throw new InvalidRequestException("Expiration " + expiry.toString()
379          + " exceeds the max relative expiration time of "
380          + maxRelativeExpiryTime + " ms.");
381    }
382    return absExpiryTime;
383  }
384
385  /**
386   * Throws an exception if the CachePool does not have enough capacity to
387   * cache the given path at the replication factor.
388   *
389   * @param pool CachePool where the path is being cached
390   * @param path Path that is being cached
391   * @param replication Replication factor of the path
392   * @throws InvalidRequestException if the pool does not have enough capacity
393   */
394  private void checkLimit(CachePool pool, String path,
395      short replication) throws InvalidRequestException {
396    CacheDirectiveStats stats = computeNeeded(path, replication);
397    if (pool.getLimit() == CachePoolInfo.LIMIT_UNLIMITED) {
398      return;
399    }
400    if (pool.getBytesNeeded() + (stats.getBytesNeeded() * replication) > pool
401        .getLimit()) {
402      throw new InvalidRequestException("Caching path " + path + " of size "
403          + stats.getBytesNeeded() / replication + " bytes at replication "
404          + replication + " would exceed pool " + pool.getPoolName()
405          + "'s remaining capacity of "
406          + (pool.getLimit() - pool.getBytesNeeded()) + " bytes.");
407    }
408  }
409
410  /**
411   * Computes the needed number of bytes and files for a path.
412   * @return CacheDirectiveStats describing the needed stats for this path
413   */
414  private CacheDirectiveStats computeNeeded(String path, short replication) {
415    FSDirectory fsDir = namesystem.getFSDirectory();
416    INode node;
417    long requestedBytes = 0;
418    long requestedFiles = 0;
419    CacheDirectiveStats.Builder builder = new CacheDirectiveStats.Builder();
420    try {
421      node = fsDir.getINode(path);
422    } catch (UnresolvedLinkException e) {
423      // We don't cache through symlinks
424      return builder.build();
425    }
426    if (node == null) {
427      return builder.build();
428    }
429    if (node.isFile()) {
430      requestedFiles = 1;
431      INodeFile file = node.asFile();
432      requestedBytes = file.computeFileSize();
433    } else if (node.isDirectory()) {
434      INodeDirectory dir = node.asDirectory();
435      ReadOnlyList<INode> children = dir
436          .getChildrenList(Snapshot.CURRENT_STATE_ID);
437      requestedFiles = children.size();
438      for (INode child : children) {
439        if (child.isFile()) {
440          requestedBytes += child.asFile().computeFileSize();
441        }
442      }
443    }
444    return new CacheDirectiveStats.Builder()
445        .setBytesNeeded(requestedBytes)
446        .setFilesCached(requestedFiles)
447        .build();
448  }
449
450  /**
451   * Get a CacheDirective by ID, validating the ID and that the directive
452   * exists.
453   */
454  private CacheDirective getById(long id) throws InvalidRequestException {
455    // Check for invalid IDs.
456    if (id <= 0) {
457      throw new InvalidRequestException("Invalid negative ID.");
458    }
459    // Find the directive.
460    CacheDirective directive = directivesById.get(id);
461    if (directive == null) {
462      throw new InvalidRequestException("No directive with ID " + id
463          + " found.");
464    }
465    return directive;
466  }
467
468  /**
469   * Get a CachePool by name, validating that it exists.
470   */
471  private CachePool getCachePool(String poolName)
472      throws InvalidRequestException {
473    CachePool pool = cachePools.get(poolName);
474    if (pool == null) {
475      throw new InvalidRequestException("Unknown pool " + poolName);
476    }
477    return pool;
478  }
479
480  // RPC handlers
481
482  private void addInternal(CacheDirective directive, CachePool pool) {
483    boolean addedDirective = pool.getDirectiveList().add(directive);
484    assert addedDirective;
485    directivesById.put(directive.getId(), directive);
486    String path = directive.getPath();
487    List<CacheDirective> directives = directivesByPath.get(path);
488    if (directives == null) {
489      directives = new ArrayList<CacheDirective>(1);
490      directivesByPath.put(path, directives);
491    }
492    directives.add(directive);
493    // Fix up pool stats
494    CacheDirectiveStats stats =
495        computeNeeded(directive.getPath(), directive.getReplication());
496    directive.addBytesNeeded(stats.getBytesNeeded());
497    directive.addFilesNeeded(directive.getFilesNeeded());
498
499    setNeedsRescan();
500  }
501
502  /**
503   * Adds a directive, skipping most error checking. This should only be called
504   * internally in special scenarios like edit log replay.
505   */
506  CacheDirectiveInfo addDirectiveFromEditLog(CacheDirectiveInfo directive)
507      throws InvalidRequestException {
508    long id = directive.getId();
509    CacheDirective entry = new CacheDirective(directive);
510    CachePool pool = cachePools.get(directive.getPool());
511    addInternal(entry, pool);
512    if (nextDirectiveId <= id) {
513      nextDirectiveId = id + 1;
514    }
515    return entry.toInfo();
516  }
517
518  public CacheDirectiveInfo addDirective(
519      CacheDirectiveInfo info, FSPermissionChecker pc, EnumSet<CacheFlag> flags)
520      throws IOException {
521    assert namesystem.hasWriteLock();
522    CacheDirective directive;
523    try {
524      CachePool pool = getCachePool(validatePoolName(info));
525      checkWritePermission(pc, pool);
526      String path = validatePath(info);
527      short replication = validateReplication(info, (short)1);
528      long expiryTime = validateExpiryTime(info, pool.getMaxRelativeExpiryMs());
529      // Do quota validation if required
530      if (!flags.contains(CacheFlag.FORCE)) {
531        checkLimit(pool, path, replication);
532      }
533      // All validation passed
534      // Add a new entry with the next available ID.
535      long id = getNextDirectiveId();
536      directive = new CacheDirective(id, path, replication, expiryTime);
537      addInternal(directive, pool);
538    } catch (IOException e) {
539      LOG.warn("addDirective of " + info + " failed: ", e);
540      throw e;
541    }
542    LOG.info("addDirective of " + info + " successful.");
543    return directive.toInfo();
544  }
545
546  /**
547   * Factory method that makes a new CacheDirectiveInfo by applying fields in a
548   * CacheDirectiveInfo to an existing CacheDirective.
549   * 
550   * @param info with some or all fields set.
551   * @param defaults directive providing default values for unset fields in
552   *          info.
553   * 
554   * @return new CacheDirectiveInfo of the info applied to the defaults.
555   */
556  private static CacheDirectiveInfo createFromInfoAndDefaults(
557      CacheDirectiveInfo info, CacheDirective defaults) {
558    // Initialize the builder with the default values
559    CacheDirectiveInfo.Builder builder =
560        new CacheDirectiveInfo.Builder(defaults.toInfo());
561    // Replace default with new value if present
562    if (info.getPath() != null) {
563      builder.setPath(info.getPath());
564    }
565    if (info.getReplication() != null) {
566      builder.setReplication(info.getReplication());
567    }
568    if (info.getPool() != null) {
569      builder.setPool(info.getPool());
570    }
571    if (info.getExpiration() != null) {
572      builder.setExpiration(info.getExpiration());
573    }
574    return builder.build();
575  }
576
577  /**
578   * Modifies a directive, skipping most error checking. This is for careful
579   * internal use only. modifyDirective can be non-deterministic since its error
580   * checking depends on current system time, which poses a problem for edit log
581   * replay.
582   */
583  void modifyDirectiveFromEditLog(CacheDirectiveInfo info)
584      throws InvalidRequestException {
585    // Check for invalid IDs.
586    Long id = info.getId();
587    if (id == null) {
588      throw new InvalidRequestException("Must supply an ID.");
589    }
590    CacheDirective prevEntry = getById(id);
591    CacheDirectiveInfo newInfo = createFromInfoAndDefaults(info, prevEntry);
592    removeInternal(prevEntry);
593    addInternal(new CacheDirective(newInfo), getCachePool(newInfo.getPool()));
594  }
595
596  public void modifyDirective(CacheDirectiveInfo info,
597      FSPermissionChecker pc, EnumSet<CacheFlag> flags) throws IOException {
598    assert namesystem.hasWriteLock();
599    String idString =
600        (info.getId() == null) ?
601            "(null)" : info.getId().toString();
602    try {
603      // Check for invalid IDs.
604      Long id = info.getId();
605      if (id == null) {
606        throw new InvalidRequestException("Must supply an ID.");
607      }
608      CacheDirective prevEntry = getById(id);
609      checkWritePermission(pc, prevEntry.getPool());
610
611      // Fill in defaults
612      CacheDirectiveInfo infoWithDefaults =
613          createFromInfoAndDefaults(info, prevEntry);
614      CacheDirectiveInfo.Builder builder =
615          new CacheDirectiveInfo.Builder(infoWithDefaults);
616
617      // Do validation
618      validatePath(infoWithDefaults);
619      validateReplication(infoWithDefaults, (short)-1);
620      // Need to test the pool being set here to avoid rejecting a modify for a
621      // directive that's already been forced into a pool
622      CachePool srcPool = prevEntry.getPool();
623      CachePool destPool = getCachePool(validatePoolName(infoWithDefaults));
624      if (!srcPool.getPoolName().equals(destPool.getPoolName())) {
625        checkWritePermission(pc, destPool);
626        if (!flags.contains(CacheFlag.FORCE)) {
627          checkLimit(destPool, infoWithDefaults.getPath().toUri().getPath(),
628              infoWithDefaults.getReplication());
629        }
630      }
631      // Verify the expiration against the destination pool
632      validateExpiryTime(infoWithDefaults, destPool.getMaxRelativeExpiryMs());
633
634      // Indicate changes to the CRM
635      setNeedsRescan();
636
637      // Validation passed
638      removeInternal(prevEntry);
639      addInternal(new CacheDirective(builder.build()), destPool);
640    } catch (IOException e) {
641      LOG.warn("modifyDirective of " + idString + " failed: ", e);
642      throw e;
643    }
644    LOG.info("modifyDirective of " + idString + " successfully applied " +
645        info+ ".");
646  }
647
648  private void removeInternal(CacheDirective directive)
649      throws InvalidRequestException {
650    assert namesystem.hasWriteLock();
651    // Remove the corresponding entry in directivesByPath.
652    String path = directive.getPath();
653    List<CacheDirective> directives = directivesByPath.get(path);
654    if (directives == null || !directives.remove(directive)) {
655      throw new InvalidRequestException("Failed to locate entry " +
656          directive.getId() + " by path " + directive.getPath());
657    }
658    if (directives.size() == 0) {
659      directivesByPath.remove(path);
660    }
661    // Fix up the stats from removing the pool
662    final CachePool pool = directive.getPool();
663    directive.addBytesNeeded(-directive.getBytesNeeded());
664    directive.addFilesNeeded(-directive.getFilesNeeded());
665
666    directivesById.remove(directive.getId());
667    pool.getDirectiveList().remove(directive);
668    assert directive.getPool() == null;
669
670    setNeedsRescan();
671  }
672
673  public void removeDirective(long id, FSPermissionChecker pc)
674      throws IOException {
675    assert namesystem.hasWriteLock();
676    try {
677      CacheDirective directive = getById(id);
678      checkWritePermission(pc, directive.getPool());
679      removeInternal(directive);
680    } catch (IOException e) {
681      LOG.warn("removeDirective of " + id + " failed: ", e);
682      throw e;
683    }
684    LOG.info("removeDirective of " + id + " successful.");
685  }
686
687  public BatchedListEntries<CacheDirectiveEntry> 
688        listCacheDirectives(long prevId,
689            CacheDirectiveInfo filter,
690            FSPermissionChecker pc) throws IOException {
691    assert namesystem.hasReadLock();
692    final int NUM_PRE_ALLOCATED_ENTRIES = 16;
693    String filterPath = null;
694    if (filter.getPath() != null) {
695      filterPath = validatePath(filter);
696    }
697    if (filter.getReplication() != null) {
698      throw new InvalidRequestException(
699          "Filtering by replication is unsupported.");
700    }
701
702    // Querying for a single ID
703    final Long id = filter.getId();
704    if (id != null) {
705      if (!directivesById.containsKey(id)) {
706        throw new InvalidRequestException("Did not find requested id " + id);
707      }
708      // Since we use a tailMap on directivesById, setting prev to id-1 gets
709      // us the directive with the id (if present)
710      prevId = id - 1;
711    }
712
713    ArrayList<CacheDirectiveEntry> replies =
714        new ArrayList<CacheDirectiveEntry>(NUM_PRE_ALLOCATED_ENTRIES);
715    int numReplies = 0;
716    SortedMap<Long, CacheDirective> tailMap =
717      directivesById.tailMap(prevId + 1);
718    for (Entry<Long, CacheDirective> cur : tailMap.entrySet()) {
719      if (numReplies >= maxListCacheDirectivesNumResponses) {
720        return new BatchedListEntries<CacheDirectiveEntry>(replies, true);
721      }
722      CacheDirective curDirective = cur.getValue();
723      CacheDirectiveInfo info = cur.getValue().toInfo();
724
725      // If the requested ID is present, it should be the first item.
726      // Hitting this case means the ID is not present, or we're on the second
727      // item and should break out.
728      if (id != null &&
729          !(info.getId().equals(id))) {
730        break;
731      }
732      if (filter.getPool() != null && 
733          !info.getPool().equals(filter.getPool())) {
734        continue;
735      }
736      if (filterPath != null &&
737          !info.getPath().toUri().getPath().equals(filterPath)) {
738        continue;
739      }
740      boolean hasPermission = true;
741      if (pc != null) {
742        try {
743          pc.checkPermission(curDirective.getPool(), FsAction.READ);
744        } catch (AccessControlException e) {
745          hasPermission = false;
746        }
747      }
748      if (hasPermission) {
749        replies.add(new CacheDirectiveEntry(info, cur.getValue().toStats()));
750        numReplies++;
751      }
752    }
753    return new BatchedListEntries<CacheDirectiveEntry>(replies, false);
754  }
755
756  /**
757   * Create a cache pool.
758   * 
759   * Only the superuser should be able to call this function.
760   *
761   * @param info    The info for the cache pool to create.
762   * @return        Information about the cache pool we created.
763   */
764  public CachePoolInfo addCachePool(CachePoolInfo info)
765      throws IOException {
766    assert namesystem.hasWriteLock();
767    CachePool pool;
768    try {
769      CachePoolInfo.validate(info);
770      String poolName = info.getPoolName();
771      pool = cachePools.get(poolName);
772      if (pool != null) {
773        throw new InvalidRequestException("Cache pool " + poolName
774            + " already exists.");
775      }
776      pool = CachePool.createFromInfoAndDefaults(info);
777      cachePools.put(pool.getPoolName(), pool);
778    } catch (IOException e) {
779      LOG.info("addCachePool of " + info + " failed: ", e);
780      throw e;
781    }
782    LOG.info("addCachePool of " + info + " successful.");
783    return pool.getInfo(true);
784  }
785
786  /**
787   * Modify a cache pool.
788   * 
789   * Only the superuser should be able to call this function.
790   *
791   * @param info
792   *          The info for the cache pool to modify.
793   */
794  public void modifyCachePool(CachePoolInfo info)
795      throws IOException {
796    assert namesystem.hasWriteLock();
797    StringBuilder bld = new StringBuilder();
798    try {
799      CachePoolInfo.validate(info);
800      String poolName = info.getPoolName();
801      CachePool pool = cachePools.get(poolName);
802      if (pool == null) {
803        throw new InvalidRequestException("Cache pool " + poolName
804            + " does not exist.");
805      }
806      String prefix = "";
807      if (info.getOwnerName() != null) {
808        pool.setOwnerName(info.getOwnerName());
809        bld.append(prefix).
810          append("set owner to ").append(info.getOwnerName());
811        prefix = "; ";
812      }
813      if (info.getGroupName() != null) {
814        pool.setGroupName(info.getGroupName());
815        bld.append(prefix).
816          append("set group to ").append(info.getGroupName());
817        prefix = "; ";
818      }
819      if (info.getMode() != null) {
820        pool.setMode(info.getMode());
821        bld.append(prefix).append("set mode to " + info.getMode());
822        prefix = "; ";
823      }
824      if (info.getLimit() != null) {
825        pool.setLimit(info.getLimit());
826        bld.append(prefix).append("set limit to " + info.getLimit());
827        prefix = "; ";
828        // New limit changes stats, need to set needs refresh
829        setNeedsRescan();
830      }
831      if (info.getMaxRelativeExpiryMs() != null) {
832        final Long maxRelativeExpiry = info.getMaxRelativeExpiryMs();
833        pool.setMaxRelativeExpiryMs(maxRelativeExpiry);
834        bld.append(prefix).append("set maxRelativeExpiry to "
835            + maxRelativeExpiry);
836        prefix = "; ";
837      }
838      if (prefix.isEmpty()) {
839        bld.append("no changes.");
840      }
841    } catch (IOException e) {
842      LOG.info("modifyCachePool of " + info + " failed: ", e);
843      throw e;
844    }
845    LOG.info("modifyCachePool of " + info.getPoolName() + " successful; "
846        + bld.toString());
847  }
848
849  /**
850   * Remove a cache pool.
851   * 
852   * Only the superuser should be able to call this function.
853   *
854   * @param poolName
855   *          The name for the cache pool to remove.
856   */
857  public void removeCachePool(String poolName)
858      throws IOException {
859    assert namesystem.hasWriteLock();
860    try {
861      CachePoolInfo.validateName(poolName);
862      CachePool pool = cachePools.remove(poolName);
863      if (pool == null) {
864        throw new InvalidRequestException(
865            "Cannot remove non-existent cache pool " + poolName);
866      }
867      // Remove all directives in this pool.
868      Iterator<CacheDirective> iter = pool.getDirectiveList().iterator();
869      while (iter.hasNext()) {
870        CacheDirective directive = iter.next();
871        directivesByPath.remove(directive.getPath());
872        directivesById.remove(directive.getId());
873        iter.remove();
874      }
875      setNeedsRescan();
876    } catch (IOException e) {
877      LOG.info("removeCachePool of " + poolName + " failed: ", e);
878      throw e;
879    }
880    LOG.info("removeCachePool of " + poolName + " successful.");
881  }
882
883  public BatchedListEntries<CachePoolEntry>
884      listCachePools(FSPermissionChecker pc, String prevKey) {
885    assert namesystem.hasReadLock();
886    final int NUM_PRE_ALLOCATED_ENTRIES = 16;
887    ArrayList<CachePoolEntry> results = 
888        new ArrayList<CachePoolEntry>(NUM_PRE_ALLOCATED_ENTRIES);
889    SortedMap<String, CachePool> tailMap = cachePools.tailMap(prevKey, false);
890    int numListed = 0;
891    for (Entry<String, CachePool> cur : tailMap.entrySet()) {
892      if (numListed++ >= maxListCachePoolsResponses) {
893        return new BatchedListEntries<CachePoolEntry>(results, true);
894      }
895      results.add(cur.getValue().getEntry(pc));
896    }
897    return new BatchedListEntries<CachePoolEntry>(results, false);
898  }
899
900  public void setCachedLocations(LocatedBlock block) {
901    CachedBlock cachedBlock =
902        new CachedBlock(block.getBlock().getBlockId(),
903            (short)0, false);
904    cachedBlock = cachedBlocks.get(cachedBlock);
905    if (cachedBlock == null) {
906      return;
907    }
908    List<DatanodeDescriptor> datanodes = cachedBlock.getDatanodes(Type.CACHED);
909    for (DatanodeDescriptor datanode : datanodes) {
910      block.addCachedLoc(datanode);
911    }
912  }
913
914  public final void processCacheReport(final DatanodeID datanodeID,
915      final List<Long> blockIds) throws IOException {
916    namesystem.writeLock();
917    final long startTime = Time.monotonicNow();
918    final long endTime;
919    try {
920      final DatanodeDescriptor datanode = 
921          blockManager.getDatanodeManager().getDatanode(datanodeID);
922      if (datanode == null || !datanode.isAlive) {
923        throw new IOException(
924            "processCacheReport from dead or unregistered datanode: " +
925            datanode);
926      }
927      processCacheReportImpl(datanode, blockIds);
928    } finally {
929      endTime = Time.monotonicNow();
930      namesystem.writeUnlock();
931    }
932
933    // Log the block report processing stats from Namenode perspective
934    final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
935    if (metrics != null) {
936      metrics.addCacheBlockReport((int) (endTime - startTime));
937    }
938    if (LOG.isDebugEnabled()) {
939      LOG.debug("Processed cache report from "
940          + datanodeID + ", blocks: " + blockIds.size()
941          + ", processing time: " + (endTime - startTime) + " msecs");
942    }
943  }
944
945  private void processCacheReportImpl(final DatanodeDescriptor datanode,
946      final List<Long> blockIds) {
947    CachedBlocksList cached = datanode.getCached();
948    cached.clear();
949    CachedBlocksList cachedList = datanode.getCached();
950    CachedBlocksList pendingCachedList = datanode.getPendingCached();
951    for (Iterator<Long> iter = blockIds.iterator(); iter.hasNext(); ) {
952      long blockId = iter.next();
953      CachedBlock cachedBlock =
954          new CachedBlock(blockId, (short)0, false);
955      CachedBlock prevCachedBlock = cachedBlocks.get(cachedBlock);
956      // Add the block ID from the cache report to the cachedBlocks map
957      // if it's not already there.
958      if (prevCachedBlock != null) {
959        cachedBlock = prevCachedBlock;
960      } else {
961        cachedBlocks.put(cachedBlock);
962      }
963      // Add the block to the datanode's implicit cached block list
964      // if it's not already there.  Similarly, remove it from the pending
965      // cached block list if it exists there.
966      if (!cachedBlock.isPresent(cachedList)) {
967        cachedList.add(cachedBlock);
968      }
969      if (cachedBlock.isPresent(pendingCachedList)) {
970        pendingCachedList.remove(cachedBlock);
971      }
972    }
973  }
974
975  /**
976   * Saves the current state of the CacheManager to the DataOutput. Used
977   * to persist CacheManager state in the FSImage.
978   * @param out DataOutput to persist state
979   * @param sdPath path of the storage directory
980   * @throws IOException
981   */
982  public void saveStateCompat(DataOutputStream out, String sdPath)
983      throws IOException {
984    serializerCompat.save(out, sdPath);
985  }
986
987  public PersistState saveState() throws IOException {
988    ArrayList<CachePoolInfoProto> pools = Lists
989        .newArrayListWithCapacity(cachePools.size());
990    ArrayList<CacheDirectiveInfoProto> directives = Lists
991        .newArrayListWithCapacity(directivesById.size());
992
993    for (CachePool pool : cachePools.values()) {
994      CachePoolInfo p = pool.getInfo(true);
995      CachePoolInfoProto.Builder b = CachePoolInfoProto.newBuilder()
996          .setPoolName(p.getPoolName());
997
998      if (p.getOwnerName() != null)
999        b.setOwnerName(p.getOwnerName());
1000
1001      if (p.getGroupName() != null)
1002        b.setGroupName(p.getGroupName());
1003
1004      if (p.getMode() != null)
1005        b.setMode(p.getMode().toShort());
1006
1007      if (p.getLimit() != null)
1008        b.setLimit(p.getLimit());
1009
1010      pools.add(b.build());
1011    }
1012
1013    for (CacheDirective directive : directivesById.values()) {
1014      CacheDirectiveInfo info = directive.toInfo();
1015      CacheDirectiveInfoProto.Builder b = CacheDirectiveInfoProto.newBuilder()
1016          .setId(info.getId());
1017
1018      if (info.getPath() != null) {
1019        b.setPath(info.getPath().toUri().getPath());
1020      }
1021
1022      if (info.getReplication() != null) {
1023        b.setReplication(info.getReplication());
1024      }
1025
1026      if (info.getPool() != null) {
1027        b.setPool(info.getPool());
1028      }
1029
1030      Expiration expiry = info.getExpiration();
1031      if (expiry != null) {
1032        assert (!expiry.isRelative());
1033        b.setExpiration(PBHelper.convert(expiry));
1034      }
1035
1036      directives.add(b.build());
1037    }
1038    CacheManagerSection s = CacheManagerSection.newBuilder()
1039        .setNextDirectiveId(nextDirectiveId).setNumPools(pools.size())
1040        .setNumDirectives(directives.size()).build();
1041
1042    return new PersistState(s, pools, directives);
1043  }
1044
1045  /**
1046   * Reloads CacheManager state from the passed DataInput. Used during namenode
1047   * startup to restore CacheManager state from an FSImage.
1048   * @param in DataInput from which to restore state
1049   * @throws IOException
1050   */
1051  public void loadStateCompat(DataInput in) throws IOException {
1052    serializerCompat.load(in);
1053  }
1054
1055  public void loadState(PersistState s) throws IOException {
1056    nextDirectiveId = s.section.getNextDirectiveId();
1057    for (CachePoolInfoProto p : s.pools) {
1058      CachePoolInfo info = new CachePoolInfo(p.getPoolName());
1059      if (p.hasOwnerName())
1060        info.setOwnerName(p.getOwnerName());
1061
1062      if (p.hasGroupName())
1063        info.setGroupName(p.getGroupName());
1064
1065      if (p.hasMode())
1066        info.setMode(new FsPermission((short) p.getMode()));
1067
1068      if (p.hasLimit())
1069        info.setLimit(p.getLimit());
1070
1071      addCachePool(info);
1072    }
1073
1074    for (CacheDirectiveInfoProto p : s.directives) {
1075      // Get pool reference by looking it up in the map
1076      final String poolName = p.getPool();
1077      CacheDirective directive = new CacheDirective(p.getId(), new Path(
1078          p.getPath()).toUri().getPath(), (short) p.getReplication(), p
1079          .getExpiration().getMillis());
1080      addCacheDirective(poolName, directive);
1081    }
1082  }
1083
1084  private void addCacheDirective(final String poolName,
1085      final CacheDirective directive) throws IOException {
1086    CachePool pool = cachePools.get(poolName);
1087    if (pool == null) {
1088      throw new IOException("Directive refers to pool " + poolName
1089          + ", which does not exist.");
1090    }
1091    boolean addedDirective = pool.getDirectiveList().add(directive);
1092    assert addedDirective;
1093    if (directivesById.put(directive.getId(), directive) != null) {
1094      throw new IOException("A directive with ID " + directive.getId()
1095          + " already exists");
1096    }
1097    List<CacheDirective> directives = directivesByPath.get(directive.getPath());
1098    if (directives == null) {
1099      directives = new LinkedList<CacheDirective>();
1100      directivesByPath.put(directive.getPath(), directives);
1101    }
1102    directives.add(directive);
1103  }
1104
1105  private final class SerializerCompat {
1106    private void save(DataOutputStream out, String sdPath) throws IOException {
1107      out.writeLong(nextDirectiveId);
1108      savePools(out, sdPath);
1109      saveDirectives(out, sdPath);
1110    }
1111
1112    private void load(DataInput in) throws IOException {
1113      nextDirectiveId = in.readLong();
1114      // pools need to be loaded first since directives point to their parent pool
1115      loadPools(in);
1116      loadDirectives(in);
1117    }
1118
1119    /**
1120     * Save cache pools to fsimage
1121     */
1122    private void savePools(DataOutputStream out,
1123        String sdPath) throws IOException {
1124      StartupProgress prog = NameNode.getStartupProgress();
1125      Step step = new Step(StepType.CACHE_POOLS, sdPath);
1126      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
1127      prog.setTotal(Phase.SAVING_CHECKPOINT, step, cachePools.size());
1128      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
1129      out.writeInt(cachePools.size());
1130      for (CachePool pool: cachePools.values()) {
1131        FSImageSerialization.writeCachePoolInfo(out, pool.getInfo(true));
1132        counter.increment();
1133      }
1134      prog.endStep(Phase.SAVING_CHECKPOINT, step);
1135    }
1136
1137    /*
1138     * Save cache entries to fsimage
1139     */
1140    private void saveDirectives(DataOutputStream out, String sdPath)
1141        throws IOException {
1142      StartupProgress prog = NameNode.getStartupProgress();
1143      Step step = new Step(StepType.CACHE_ENTRIES, sdPath);
1144      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
1145      prog.setTotal(Phase.SAVING_CHECKPOINT, step, directivesById.size());
1146      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
1147      out.writeInt(directivesById.size());
1148      for (CacheDirective directive : directivesById.values()) {
1149        FSImageSerialization.writeCacheDirectiveInfo(out, directive.toInfo());
1150        counter.increment();
1151      }
1152      prog.endStep(Phase.SAVING_CHECKPOINT, step);
1153    }
1154
1155    /**
1156     * Load cache pools from fsimage
1157     */
1158    private void loadPools(DataInput in)
1159        throws IOException {
1160      StartupProgress prog = NameNode.getStartupProgress();
1161      Step step = new Step(StepType.CACHE_POOLS);
1162      prog.beginStep(Phase.LOADING_FSIMAGE, step);
1163      int numberOfPools = in.readInt();
1164      prog.setTotal(Phase.LOADING_FSIMAGE, step, numberOfPools);
1165      Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step);
1166      for (int i = 0; i < numberOfPools; i++) {
1167        addCachePool(FSImageSerialization.readCachePoolInfo(in));
1168        counter.increment();
1169      }
1170      prog.endStep(Phase.LOADING_FSIMAGE, step);
1171    }
1172
1173    /**
1174     * Load cache directives from the fsimage
1175     */
1176    private void loadDirectives(DataInput in) throws IOException {
1177      StartupProgress prog = NameNode.getStartupProgress();
1178      Step step = new Step(StepType.CACHE_ENTRIES);
1179      prog.beginStep(Phase.LOADING_FSIMAGE, step);
1180      int numDirectives = in.readInt();
1181      prog.setTotal(Phase.LOADING_FSIMAGE, step, numDirectives);
1182      Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step);
1183      for (int i = 0; i < numDirectives; i++) {
1184        CacheDirectiveInfo info = FSImageSerialization.readCacheDirectiveInfo(in);
1185        // Get pool reference by looking it up in the map
1186        final String poolName = info.getPool();
1187        CacheDirective directive =
1188            new CacheDirective(info.getId(), info.getPath().toUri().getPath(),
1189                info.getReplication(), info.getExpiration().getAbsoluteMillis());
1190        addCacheDirective(poolName, directive);
1191        counter.increment();
1192      }
1193      prog.endStep(Phase.LOADING_FSIMAGE, step);
1194    }
1195  }
1196
1197  public void waitForRescanIfNeeded() {
1198    crmLock.lock();
1199    try {
1200      if (monitor != null) {
1201        monitor.waitForRescanIfNeeded();
1202      }
1203    } finally {
1204      crmLock.unlock();
1205    }
1206  }
1207
1208  private void setNeedsRescan() {
1209    crmLock.lock();
1210    try {
1211      if (monitor != null) {
1212        monitor.setNeedsRescan();
1213      }
1214    } finally {
1215      crmLock.unlock();
1216    }
1217  }
1218
1219  @VisibleForTesting
1220  public Thread getCacheReplicationMonitor() {
1221    crmLock.lock();
1222    try {
1223      return monitor;
1224    } finally {
1225      crmLock.unlock();
1226    }
1227  }
1228}