001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs;
019
020import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT;
021import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
022import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT;
023import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY;
024import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_DEFAULT;
025import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_KEY;
026import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_BLOCK_WRITE_RETRIES_DEFAULT;
027import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_BLOCK_WRITE_RETRIES_KEY;
028import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CACHED_CONN_RETRY_DEFAULT;
029import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CACHED_CONN_RETRY_KEY;
030import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CACHE_DROP_BEHIND_READS;
031import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CACHE_DROP_BEHIND_WRITES;
032import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CACHE_READAHEAD;
033import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_DATANODE_RESTART_TIMEOUT_KEY;
034import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_DATANODE_RESTART_TIMEOUT_DEFAULT;
035import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT;
036import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY;
037import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT;
038import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY;
039import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT;
040import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY;
041import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT;
042import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY;
043import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT;
044import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY;
045import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_READ_PREFETCH_SIZE_KEY;
046import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_WINDOW_BASE;
047import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT;
048import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY;
049import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT;
050import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY;
051import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
052import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_DN_HOSTNAME;
053import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_DN_HOSTNAME_DEFAULT;
054import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL;
055import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT;
056import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT;
057import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY;
058import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
059import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
060import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
061import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT;
062import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT_DEFAULT;
063
064import java.io.BufferedOutputStream;
065import java.io.DataInputStream;
066import java.io.DataOutputStream;
067import java.io.FileNotFoundException;
068import java.io.IOException;
069import java.io.InputStream;
070import java.io.OutputStream;
071import java.net.InetAddress;
072import java.net.InetSocketAddress;
073import java.net.NetworkInterface;
074import java.net.Socket;
075import java.net.SocketException;
076import java.net.SocketAddress;
077import java.net.URI;
078import java.net.UnknownHostException;
079import java.util.ArrayList;
080import java.util.Collections;
081import java.util.EnumSet;
082import java.util.HashMap;
083import java.util.LinkedHashMap;
084import java.util.List;
085import java.util.Map;
086import java.util.Random;
087import java.util.concurrent.SynchronousQueue;
088import java.util.concurrent.ThreadPoolExecutor;
089import java.util.concurrent.TimeUnit;
090import java.util.concurrent.atomic.AtomicInteger;
091
092import javax.net.SocketFactory;
093
094import com.google.common.collect.Lists;
095import org.apache.commons.logging.Log;
096import org.apache.commons.logging.LogFactory;
097import org.apache.hadoop.classification.InterfaceAudience;
098import org.apache.hadoop.conf.Configuration;
099import org.apache.hadoop.fs.BlockLocation;
100import org.apache.hadoop.fs.BlockStorageLocation;
101import org.apache.hadoop.fs.CacheFlag;
102import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
103import org.apache.hadoop.fs.ContentSummary;
104import org.apache.hadoop.fs.CreateFlag;
105import org.apache.hadoop.fs.FileAlreadyExistsException;
106import org.apache.hadoop.fs.FileSystem;
107import org.apache.hadoop.fs.FsServerDefaults;
108import org.apache.hadoop.fs.FsStatus;
109import org.apache.hadoop.fs.HdfsBlockLocation;
110import org.apache.hadoop.fs.InvalidPathException;
111import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
112import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
113import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
114import org.apache.hadoop.fs.Options;
115import org.apache.hadoop.fs.RemoteIterator;
116import org.apache.hadoop.fs.XAttr;
117import org.apache.hadoop.fs.XAttrSetFlag;
118import org.apache.hadoop.fs.Options.ChecksumOpt;
119import org.apache.hadoop.fs.ParentNotDirectoryException;
120import org.apache.hadoop.fs.Path;
121import org.apache.hadoop.fs.UnresolvedLinkException;
122import org.apache.hadoop.fs.VolumeId;
123import org.apache.hadoop.fs.permission.AclEntry;
124import org.apache.hadoop.fs.permission.AclStatus;
125import org.apache.hadoop.fs.permission.FsPermission;
126import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
127import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
128import org.apache.hadoop.hdfs.protocol.AclException;
129import org.apache.hadoop.hdfs.net.Peer;
130import org.apache.hadoop.hdfs.net.TcpPeerServer;
131import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
132import org.apache.hadoop.hdfs.protocol.CacheDirectiveIterator;
133import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
134import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
135import org.apache.hadoop.hdfs.protocol.CachePoolIterator;
136import org.apache.hadoop.hdfs.protocol.ClientProtocol;
137import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
138import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
139import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
140import org.apache.hadoop.hdfs.protocol.DirectoryListing;
141import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
142import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata;
143import org.apache.hadoop.hdfs.protocol.HdfsConstants;
144import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
145import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
146import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
147import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
148import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
149import org.apache.hadoop.hdfs.protocol.LocatedBlock;
150import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
151import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
152import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
153import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException;
154import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
155import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
156import org.apache.hadoop.hdfs.protocol.UnresolvedPathException;
157import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferEncryptor;
158import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
159import org.apache.hadoop.hdfs.protocol.datatransfer.Op;
160import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure;
161import org.apache.hadoop.hdfs.protocol.datatransfer.TrustedChannelResolver;
162import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
163import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
164import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpBlockChecksumResponseProto;
165import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
166import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
167import org.apache.hadoop.hdfs.protocolPB.PBHelper;
168import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
169import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
170import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
171import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
172import org.apache.hadoop.hdfs.server.namenode.NameNode;
173import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
174import org.apache.hadoop.io.DataOutputBuffer;
175import org.apache.hadoop.io.EnumSetWritable;
176import org.apache.hadoop.io.IOUtils;
177import org.apache.hadoop.io.MD5Hash;
178import org.apache.hadoop.io.Text;
179import org.apache.hadoop.io.retry.LossyRetryInvocationHandler;
180import org.apache.hadoop.ipc.Client;
181import org.apache.hadoop.ipc.RPC;
182import org.apache.hadoop.ipc.RemoteException;
183import org.apache.hadoop.net.DNS;
184import org.apache.hadoop.net.NetUtils;
185import org.apache.hadoop.security.AccessControlException;
186import org.apache.hadoop.security.UserGroupInformation;
187import org.apache.hadoop.security.token.SecretManager.InvalidToken;
188import org.apache.hadoop.security.token.Token;
189import org.apache.hadoop.security.token.TokenRenewer;
190import org.apache.hadoop.util.Daemon;
191import org.apache.hadoop.util.DataChecksum;
192import org.apache.hadoop.util.DataChecksum.Type;
193import org.apache.hadoop.util.Progressable;
194import org.apache.hadoop.util.Time;
195
196import com.google.common.annotations.VisibleForTesting;
197import com.google.common.base.Joiner;
198import com.google.common.base.Preconditions;
199import com.google.common.net.InetAddresses;
200
201/********************************************************
202 * DFSClient can connect to a Hadoop Filesystem and 
203 * perform basic file tasks.  It uses the ClientProtocol
204 * to communicate with a NameNode daemon, and connects 
205 * directly to DataNodes to read/write block data.
206 *
207 * Hadoop DFS users should obtain an instance of 
208 * DistributedFileSystem, which uses DFSClient to handle
209 * filesystem tasks.
210 *
211 ********************************************************/
212@InterfaceAudience.Private
213public class DFSClient implements java.io.Closeable, RemotePeerFactory {
214  public static final Log LOG = LogFactory.getLog(DFSClient.class);
215  public static final long SERVER_DEFAULTS_VALIDITY_PERIOD = 60 * 60 * 1000L; // 1 hour
216  static final int TCP_WINDOW_SIZE = 128 * 1024; // 128 KB
217
218  private final Configuration conf;
219  private final Conf dfsClientConf;
220  final ClientProtocol namenode;
221  /* The service used for delegation tokens */
222  private Text dtService;
223
224  final UserGroupInformation ugi;
225  volatile boolean clientRunning = true;
226  volatile long lastLeaseRenewal;
227  private volatile FsServerDefaults serverDefaults;
228  private volatile long serverDefaultsLastUpdate;
229  final String clientName;
230  final SocketFactory socketFactory;
231  final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
232  final FileSystem.Statistics stats;
233  private final String authority;
234  private final Random r = new Random();
235  private SocketAddress[] localInterfaceAddrs;
236  private DataEncryptionKey encryptionKey;
237  final TrustedChannelResolver trustedChannelResolver;
238  private final CachingStrategy defaultReadCachingStrategy;
239  private final CachingStrategy defaultWriteCachingStrategy;
240  private final ClientContext clientContext;
241  private volatile long hedgedReadThresholdMillis;
242  private static final DFSHedgedReadMetrics HEDGED_READ_METRIC =
243      new DFSHedgedReadMetrics();
244  private static ThreadPoolExecutor HEDGED_READ_THREAD_POOL;
245  
246  /**
247   * DFSClient configuration 
248   */
249  public static class Conf {
250    final int hdfsTimeout;    // timeout value for a DFS operation.
251
252    final int maxFailoverAttempts;
253    final int maxRetryAttempts;
254    final int failoverSleepBaseMillis;
255    final int failoverSleepMaxMillis;
256    final int maxBlockAcquireFailures;
257    final int confTime;
258    final int ioBufferSize;
259    final ChecksumOpt defaultChecksumOpt;
260    final int writePacketSize;
261    final int socketTimeout;
262    final int socketCacheCapacity;
263    final long socketCacheExpiry;
264    final long excludedNodesCacheExpiry;
265    /** Wait time window (in msec) if BlockMissingException is caught */
266    final int timeWindow;
267    final int nCachedConnRetry;
268    final int nBlockWriteRetry;
269    final int nBlockWriteLocateFollowingRetry;
270    final long defaultBlockSize;
271    final long prefetchSize;
272    final short defaultReplication;
273    final String taskId;
274    final FsPermission uMask;
275    final boolean connectToDnViaHostname;
276    final boolean getHdfsBlocksMetadataEnabled;
277    final int getFileBlockStorageLocationsNumThreads;
278    final int getFileBlockStorageLocationsTimeoutMs;
279    final int retryTimesForGetLastBlockLength;
280    final int retryIntervalForGetLastBlockLength;
281    final long datanodeRestartTimeout;
282    final long dfsclientSlowIoWarningThresholdMs;
283
284    final boolean useLegacyBlockReader;
285    final boolean useLegacyBlockReaderLocal;
286    final String domainSocketPath;
287    final boolean skipShortCircuitChecksums;
288    final int shortCircuitBufferSize;
289    final boolean shortCircuitLocalReads;
290    final boolean domainSocketDataTraffic;
291    final int shortCircuitStreamsCacheSize;
292    final long shortCircuitStreamsCacheExpiryMs; 
293    final int shortCircuitSharedMemoryWatcherInterruptCheckMs;
294    
295    final boolean shortCircuitMmapEnabled;
296    final int shortCircuitMmapCacheSize;
297    final long shortCircuitMmapCacheExpiryMs;
298    final long shortCircuitMmapCacheRetryTimeout;
299    final long shortCircuitCacheStaleThresholdMs;
300
301    public Conf(Configuration conf) {
302      // The hdfsTimeout is currently the same as the ipc timeout 
303      hdfsTimeout = Client.getTimeout(conf);
304      maxFailoverAttempts = conf.getInt(
305          DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY,
306          DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
307      maxRetryAttempts = conf.getInt(
308          DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY,
309          DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT);
310      failoverSleepBaseMillis = conf.getInt(
311          DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY,
312          DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT);
313      failoverSleepMaxMillis = conf.getInt(
314          DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY,
315          DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT);
316
317      maxBlockAcquireFailures = conf.getInt(
318          DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY,
319          DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT);
320      confTime = conf.getInt(DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY,
321          HdfsServerConstants.WRITE_TIMEOUT);
322      ioBufferSize = conf.getInt(
323          CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY,
324          CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT);
325      defaultChecksumOpt = getChecksumOptFromConf(conf);
326      socketTimeout = conf.getInt(DFS_CLIENT_SOCKET_TIMEOUT_KEY,
327          HdfsServerConstants.READ_TIMEOUT);
328      /** dfs.write.packet.size is an internal config variable */
329      writePacketSize = conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY,
330          DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT);
331      defaultBlockSize = conf.getLongBytes(DFS_BLOCK_SIZE_KEY,
332          DFS_BLOCK_SIZE_DEFAULT);
333      defaultReplication = (short) conf.getInt(
334          DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT);
335      taskId = conf.get("mapreduce.task.attempt.id", "NONMAPREDUCE");
336      socketCacheCapacity = conf.getInt(DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY,
337          DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT);
338      socketCacheExpiry = conf.getLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
339          DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT);
340      excludedNodesCacheExpiry = conf.getLong(
341          DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL,
342          DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT);
343      prefetchSize = conf.getLong(DFS_CLIENT_READ_PREFETCH_SIZE_KEY,
344          10 * defaultBlockSize);
345      timeWindow = conf.getInt(DFS_CLIENT_RETRY_WINDOW_BASE, 3000);
346      nCachedConnRetry = conf.getInt(DFS_CLIENT_CACHED_CONN_RETRY_KEY,
347          DFS_CLIENT_CACHED_CONN_RETRY_DEFAULT);
348      nBlockWriteRetry = conf.getInt(DFS_CLIENT_BLOCK_WRITE_RETRIES_KEY,
349          DFS_CLIENT_BLOCK_WRITE_RETRIES_DEFAULT);
350      nBlockWriteLocateFollowingRetry = conf.getInt(
351          DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_KEY,
352          DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_DEFAULT);
353      uMask = FsPermission.getUMask(conf);
354      connectToDnViaHostname = conf.getBoolean(DFS_CLIENT_USE_DN_HOSTNAME,
355          DFS_CLIENT_USE_DN_HOSTNAME_DEFAULT);
356      getHdfsBlocksMetadataEnabled = conf.getBoolean(
357          DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, 
358          DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT);
359      getFileBlockStorageLocationsNumThreads = conf.getInt(
360          DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_NUM_THREADS,
361          DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_NUM_THREADS_DEFAULT);
362      getFileBlockStorageLocationsTimeoutMs = conf.getInt(
363          DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_MS,
364          DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_MS_DEFAULT);
365      retryTimesForGetLastBlockLength = conf.getInt(
366          DFSConfigKeys.DFS_CLIENT_RETRY_TIMES_GET_LAST_BLOCK_LENGTH,
367          DFSConfigKeys.DFS_CLIENT_RETRY_TIMES_GET_LAST_BLOCK_LENGTH_DEFAULT);
368      retryIntervalForGetLastBlockLength = conf.getInt(
369        DFSConfigKeys.DFS_CLIENT_RETRY_INTERVAL_GET_LAST_BLOCK_LENGTH,
370        DFSConfigKeys.DFS_CLIENT_RETRY_INTERVAL_GET_LAST_BLOCK_LENGTH_DEFAULT);
371
372      useLegacyBlockReader = conf.getBoolean(
373          DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER,
374          DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT);
375      useLegacyBlockReaderLocal = conf.getBoolean(
376          DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL,
377          DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT);
378      shortCircuitLocalReads = conf.getBoolean(
379          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
380          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT);
381      domainSocketDataTraffic = conf.getBoolean(
382          DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
383          DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT);
384      domainSocketPath = conf.getTrimmed(
385          DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
386          DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT);
387
388      if (BlockReaderLocal.LOG.isDebugEnabled()) {
389        BlockReaderLocal.LOG.debug(
390            DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL
391            + " = " + useLegacyBlockReaderLocal);
392        BlockReaderLocal.LOG.debug(
393            DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY
394            + " = " + shortCircuitLocalReads);
395        BlockReaderLocal.LOG.debug(
396            DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC
397            + " = " + domainSocketDataTraffic);
398        BlockReaderLocal.LOG.debug(
399            DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY
400            + " = " + domainSocketPath);
401      }
402
403      skipShortCircuitChecksums = conf.getBoolean(
404          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
405          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
406      shortCircuitBufferSize = conf.getInt(
407          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
408          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
409      shortCircuitStreamsCacheSize = conf.getInt(
410          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY,
411          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT);
412      shortCircuitStreamsCacheExpiryMs = conf.getLong(
413          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
414          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT);
415      shortCircuitMmapEnabled = conf.getBoolean(
416          DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED,
417          DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED_DEFAULT);
418      shortCircuitMmapCacheSize = conf.getInt(
419          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE,
420          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT);
421      shortCircuitMmapCacheExpiryMs = conf.getLong(
422          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
423          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT);
424      shortCircuitMmapCacheRetryTimeout = conf.getLong(
425          DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS,
426          DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS_DEFAULT);
427      shortCircuitCacheStaleThresholdMs = conf.getLong(
428          DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS,
429          DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS_DEFAULT);
430      shortCircuitSharedMemoryWatcherInterruptCheckMs = conf.getInt(
431          DFSConfigKeys.DFS_SHORT_CIRCUIT_SHARED_MEMORY_WATCHER_INTERRUPT_CHECK_MS,
432          DFSConfigKeys.DFS_SHORT_CIRCUIT_SHARED_MEMORY_WATCHER_INTERRUPT_CHECK_MS_DEFAULT);
433
434      datanodeRestartTimeout = conf.getLong(
435          DFS_CLIENT_DATANODE_RESTART_TIMEOUT_KEY,
436          DFS_CLIENT_DATANODE_RESTART_TIMEOUT_DEFAULT) * 1000;
437      dfsclientSlowIoWarningThresholdMs = conf.getLong(
438          DFSConfigKeys.DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_KEY,
439          DFSConfigKeys.DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_DEFAULT);
440    }
441
442    public boolean isUseLegacyBlockReaderLocal() {
443      return useLegacyBlockReaderLocal;
444    }
445
446    public String getDomainSocketPath() {
447      return domainSocketPath;
448    }
449
450    public boolean isShortCircuitLocalReads() {
451      return shortCircuitLocalReads;
452    }
453
454    public boolean isDomainSocketDataTraffic() {
455      return domainSocketDataTraffic;
456    }
457
458    private DataChecksum.Type getChecksumType(Configuration conf) {
459      final String checksum = conf.get(
460          DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY,
461          DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT);
462      try {
463        return DataChecksum.Type.valueOf(checksum);
464      } catch(IllegalArgumentException iae) {
465        LOG.warn("Bad checksum type: " + checksum + ". Using default "
466            + DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT);
467        return DataChecksum.Type.valueOf(
468            DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT); 
469      }
470    }
471
472    // Construct a checksum option from conf
473    private ChecksumOpt getChecksumOptFromConf(Configuration conf) {
474      DataChecksum.Type type = getChecksumType(conf);
475      int bytesPerChecksum = conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY,
476          DFS_BYTES_PER_CHECKSUM_DEFAULT);
477      return new ChecksumOpt(type, bytesPerChecksum);
478    }
479
480    // create a DataChecksum with the default option.
481    private DataChecksum createChecksum() throws IOException {
482      return createChecksum(null);
483    }
484
485    private DataChecksum createChecksum(ChecksumOpt userOpt) 
486        throws IOException {
487      // Fill in any missing field with the default.
488      ChecksumOpt myOpt = ChecksumOpt.processChecksumOpt(
489          defaultChecksumOpt, userOpt);
490      DataChecksum dataChecksum = DataChecksum.newDataChecksum(
491          myOpt.getChecksumType(),
492          myOpt.getBytesPerChecksum());
493      if (dataChecksum == null) {
494        throw new IOException("Invalid checksum type specified: "
495            + myOpt.getChecksumType().name());
496      }
497      return dataChecksum;
498    }
499  }
500 
501  public Conf getConf() {
502    return dfsClientConf;
503  }
504
505  Configuration getConfiguration() {
506    return conf;
507  }
508
509  /**
510   * A map from file names to {@link DFSOutputStream} objects
511   * that are currently being written by this client.
512   * Note that a file can only be written by a single client.
513   */
514  private final Map<Long, DFSOutputStream> filesBeingWritten
515      = new HashMap<Long, DFSOutputStream>();
516
517  /**
518   * Same as this(NameNode.getAddress(conf), conf);
519   * @see #DFSClient(InetSocketAddress, Configuration)
520   * @deprecated Deprecated at 0.21
521   */
522  @Deprecated
523  public DFSClient(Configuration conf) throws IOException {
524    this(NameNode.getAddress(conf), conf);
525  }
526  
527  public DFSClient(InetSocketAddress address, Configuration conf) throws IOException {
528    this(NameNode.getUri(address), conf);
529  }
530
531  /**
532   * Same as this(nameNodeUri, conf, null);
533   * @see #DFSClient(URI, Configuration, FileSystem.Statistics)
534   */
535  public DFSClient(URI nameNodeUri, Configuration conf
536      ) throws IOException {
537    this(nameNodeUri, conf, null);
538  }
539
540  /**
541   * Same as this(nameNodeUri, null, conf, stats);
542   * @see #DFSClient(URI, ClientProtocol, Configuration, FileSystem.Statistics) 
543   */
544  public DFSClient(URI nameNodeUri, Configuration conf,
545                   FileSystem.Statistics stats)
546    throws IOException {
547    this(nameNodeUri, null, conf, stats);
548  }
549  
550  /** 
551   * Create a new DFSClient connected to the given nameNodeUri or rpcNamenode.
552   * If HA is enabled and a positive value is set for 
553   * {@link DFSConfigKeys#DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY} in the
554   * configuration, the DFSClient will use {@link LossyRetryInvocationHandler}
555   * as its RetryInvocationHandler. Otherwise one of nameNodeUri or rpcNamenode 
556   * must be null.
557   */
558  @VisibleForTesting
559  public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode,
560      Configuration conf, FileSystem.Statistics stats)
561    throws IOException {
562    // Copy only the required DFSClient configuration
563    this.dfsClientConf = new Conf(conf);
564    if (this.dfsClientConf.useLegacyBlockReaderLocal) {
565      LOG.debug("Using legacy short-circuit local reads.");
566    }
567    this.conf = conf;
568    this.stats = stats;
569    this.socketFactory = NetUtils.getSocketFactory(conf, ClientProtocol.class);
570    this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf);
571
572    this.ugi = UserGroupInformation.getCurrentUser();
573    
574    this.authority = nameNodeUri == null? "null": nameNodeUri.getAuthority();
575    this.clientName = "DFSClient_" + dfsClientConf.taskId + "_" + 
576        DFSUtil.getRandom().nextInt()  + "_" + Thread.currentThread().getId();
577    
578    int numResponseToDrop = conf.getInt(
579        DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY,
580        DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT);
581    NameNodeProxies.ProxyAndInfo<ClientProtocol> proxyInfo = null;
582    if (numResponseToDrop > 0) {
583      // This case is used for testing.
584      LOG.warn(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY
585          + " is set to " + numResponseToDrop
586          + ", this hacked client will proactively drop responses");
587      proxyInfo = NameNodeProxies.createProxyWithLossyRetryHandler(conf,
588          nameNodeUri, ClientProtocol.class, numResponseToDrop);
589    }
590    
591    if (proxyInfo != null) {
592      this.dtService = proxyInfo.getDelegationTokenService();
593      this.namenode = proxyInfo.getProxy();
594    } else if (rpcNamenode != null) {
595      // This case is used for testing.
596      Preconditions.checkArgument(nameNodeUri == null);
597      this.namenode = rpcNamenode;
598      dtService = null;
599    } else {
600      Preconditions.checkArgument(nameNodeUri != null,
601          "null URI");
602      proxyInfo = NameNodeProxies.createProxy(conf, nameNodeUri,
603          ClientProtocol.class);
604      this.dtService = proxyInfo.getDelegationTokenService();
605      this.namenode = proxyInfo.getProxy();
606    }
607
608    String localInterfaces[] =
609      conf.getTrimmedStrings(DFSConfigKeys.DFS_CLIENT_LOCAL_INTERFACES);
610    localInterfaceAddrs = getLocalInterfaceAddrs(localInterfaces);
611    if (LOG.isDebugEnabled() && 0 != localInterfaces.length) {
612      LOG.debug("Using local interfaces [" +
613      Joiner.on(',').join(localInterfaces)+ "] with addresses [" +
614      Joiner.on(',').join(localInterfaceAddrs) + "]");
615    }
616    
617    Boolean readDropBehind = (conf.get(DFS_CLIENT_CACHE_DROP_BEHIND_READS) == null) ?
618        null : conf.getBoolean(DFS_CLIENT_CACHE_DROP_BEHIND_READS, false);
619    Long readahead = (conf.get(DFS_CLIENT_CACHE_READAHEAD) == null) ?
620        null : conf.getLong(DFS_CLIENT_CACHE_READAHEAD, 0);
621    Boolean writeDropBehind = (conf.get(DFS_CLIENT_CACHE_DROP_BEHIND_WRITES) == null) ?
622        null : conf.getBoolean(DFS_CLIENT_CACHE_DROP_BEHIND_WRITES, false);
623    this.defaultReadCachingStrategy =
624        new CachingStrategy(readDropBehind, readahead);
625    this.defaultWriteCachingStrategy =
626        new CachingStrategy(writeDropBehind, readahead);
627    this.clientContext = ClientContext.get(
628        conf.get(DFS_CLIENT_CONTEXT, DFS_CLIENT_CONTEXT_DEFAULT),
629        dfsClientConf);
630    this.hedgedReadThresholdMillis = conf.getLong(
631        DFSConfigKeys.DFS_DFSCLIENT_HEDGED_READ_THRESHOLD_MILLIS,
632        DFSConfigKeys.DEFAULT_DFSCLIENT_HEDGED_READ_THRESHOLD_MILLIS);
633    int numThreads = conf.getInt(
634        DFSConfigKeys.DFS_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE,
635        DFSConfigKeys.DEFAULT_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE);
636    if (numThreads > 0) {
637      this.initThreadsNumForHedgedReads(numThreads);
638    }
639    this.trustedChannelResolver = TrustedChannelResolver.getInstance(getConfiguration());
640  }
641  
642  /**
643   * Return the socket addresses to use with each configured
644   * local interface. Local interfaces may be specified by IP
645   * address, IP address range using CIDR notation, interface
646   * name (e.g. eth0) or sub-interface name (e.g. eth0:0).
647   * The socket addresses consist of the IPs for the interfaces
648   * and the ephemeral port (port 0). If an IP, IP range, or
649   * interface name matches an interface with sub-interfaces
650   * only the IP of the interface is used. Sub-interfaces can
651   * be used by specifying them explicitly (by IP or name).
652   * 
653   * @return SocketAddresses for the configured local interfaces,
654   *    or an empty array if none are configured
655   * @throws UnknownHostException if a given interface name is invalid
656   */
657  private static SocketAddress[] getLocalInterfaceAddrs(
658      String interfaceNames[]) throws UnknownHostException {
659    List<SocketAddress> localAddrs = new ArrayList<SocketAddress>();
660    for (String interfaceName : interfaceNames) {
661      if (InetAddresses.isInetAddress(interfaceName)) {
662        localAddrs.add(new InetSocketAddress(interfaceName, 0));
663      } else if (NetUtils.isValidSubnet(interfaceName)) {
664        for (InetAddress addr : NetUtils.getIPs(interfaceName, false)) {
665          localAddrs.add(new InetSocketAddress(addr, 0));
666        }
667      } else {
668        for (String ip : DNS.getIPs(interfaceName, false)) {
669          localAddrs.add(new InetSocketAddress(ip, 0));
670        }
671      }
672    }
673    return localAddrs.toArray(new SocketAddress[localAddrs.size()]);
674  }
675
676  /**
677   * Select one of the configured local interfaces at random. We use a random
678   * interface because other policies like round-robin are less effective
679   * given that we cache connections to datanodes.
680   *
681   * @return one of the local interface addresses at random, or null if no
682   *    local interfaces are configured
683   */
684  SocketAddress getRandomLocalInterfaceAddr() {
685    if (localInterfaceAddrs.length == 0) {
686      return null;
687    }
688    final int idx = r.nextInt(localInterfaceAddrs.length);
689    final SocketAddress addr = localInterfaceAddrs[idx];
690    if (LOG.isDebugEnabled()) {
691      LOG.debug("Using local interface " + addr);
692    }
693    return addr;
694  }
695
696  /**
697   * Return the number of times the client should go back to the namenode
698   * to retrieve block locations when reading.
699   */
700  int getMaxBlockAcquireFailures() {
701    return dfsClientConf.maxBlockAcquireFailures;
702  }
703
704  /**
705   * Return the timeout that clients should use when writing to datanodes.
706   * @param numNodes the number of nodes in the pipeline.
707   */
708  int getDatanodeWriteTimeout(int numNodes) {
709    return (dfsClientConf.confTime > 0) ?
710      (dfsClientConf.confTime + HdfsServerConstants.WRITE_TIMEOUT_EXTENSION * numNodes) : 0;
711  }
712
713  int getDatanodeReadTimeout(int numNodes) {
714    return dfsClientConf.socketTimeout > 0 ?
715        (HdfsServerConstants.READ_TIMEOUT_EXTENSION * numNodes +
716            dfsClientConf.socketTimeout) : 0;
717  }
718  
719  int getHdfsTimeout() {
720    return dfsClientConf.hdfsTimeout;
721  }
722  
723  @VisibleForTesting
724  public String getClientName() {
725    return clientName;
726  }
727
728  void checkOpen() throws IOException {
729    if (!clientRunning) {
730      IOException result = new IOException("Filesystem closed");
731      throw result;
732    }
733  }
734
735  /** Return the lease renewer instance. The renewer thread won't start
736   *  until the first output stream is created. The same instance will
737   *  be returned until all output streams are closed.
738   */
739  public LeaseRenewer getLeaseRenewer() throws IOException {
740      return LeaseRenewer.getInstance(authority, ugi, this);
741  }
742
743  /** Get a lease and start automatic renewal */
744  private void beginFileLease(final long inodeId, final DFSOutputStream out)
745      throws IOException {
746    getLeaseRenewer().put(inodeId, out, this);
747  }
748
749  /** Stop renewal of lease for the file. */
750  void endFileLease(final long inodeId) throws IOException {
751    getLeaseRenewer().closeFile(inodeId, this);
752  }
753    
754
755  /** Put a file. Only called from LeaseRenewer, where proper locking is
756   *  enforced to consistently update its local dfsclients array and 
757   *  client's filesBeingWritten map.
758   */
759  void putFileBeingWritten(final long inodeId, final DFSOutputStream out) {
760    synchronized(filesBeingWritten) {
761      filesBeingWritten.put(inodeId, out);
762      // update the last lease renewal time only when there was no
763      // writes. once there is one write stream open, the lease renewer
764      // thread keeps it updated well with in anyone's expiration time.
765      if (lastLeaseRenewal == 0) {
766        updateLastLeaseRenewal();
767      }
768    }
769  }
770
771  /** Remove a file. Only called from LeaseRenewer. */
772  void removeFileBeingWritten(final long inodeId) {
773    synchronized(filesBeingWritten) {
774      filesBeingWritten.remove(inodeId);
775      if (filesBeingWritten.isEmpty()) {
776        lastLeaseRenewal = 0;
777      }
778    }
779  }
780
781  /** Is file-being-written map empty? */
782  boolean isFilesBeingWrittenEmpty() {
783    synchronized(filesBeingWritten) {
784      return filesBeingWritten.isEmpty();
785    }
786  }
787  
788  /** @return true if the client is running */
789  boolean isClientRunning() {
790    return clientRunning;
791  }
792
793  long getLastLeaseRenewal() {
794    return lastLeaseRenewal;
795  }
796
797  void updateLastLeaseRenewal() {
798    synchronized(filesBeingWritten) {
799      if (filesBeingWritten.isEmpty()) {
800        return;
801      }
802      lastLeaseRenewal = Time.now();
803    }
804  }
805
806  /**
807   * Renew leases.
808   * @return true if lease was renewed. May return false if this
809   * client has been closed or has no files open.
810   **/
811  boolean renewLease() throws IOException {
812    if (clientRunning && !isFilesBeingWrittenEmpty()) {
813      try {
814        namenode.renewLease(clientName);
815        updateLastLeaseRenewal();
816        return true;
817      } catch (IOException e) {
818        // Abort if the lease has already expired. 
819        final long elapsed = Time.now() - getLastLeaseRenewal();
820        if (elapsed > HdfsConstants.LEASE_HARDLIMIT_PERIOD) {
821          LOG.warn("Failed to renew lease for " + clientName + " for "
822              + (elapsed/1000) + " seconds (>= hard-limit ="
823              + (HdfsConstants.LEASE_HARDLIMIT_PERIOD/1000) + " seconds.) "
824              + "Closing all files being written ...", e);
825          closeAllFilesBeingWritten(true);
826        } else {
827          // Let the lease renewer handle it and retry.
828          throw e;
829        }
830      }
831    }
832    return false;
833  }
834  
835  /**
836   * Close connections the Namenode.
837   */
838  void closeConnectionToNamenode() {
839    RPC.stopProxy(namenode);
840  }
841  
842  /** Abort and release resources held.  Ignore all errors. */
843  void abort() {
844    clientRunning = false;
845    closeAllFilesBeingWritten(true);
846    try {
847      // remove reference to this client and stop the renewer,
848      // if there is no more clients under the renewer.
849      getLeaseRenewer().closeClient(this);
850    } catch (IOException ioe) {
851       LOG.info("Exception occurred while aborting the client " + ioe);
852    }
853    closeConnectionToNamenode();
854  }
855
856  /** Close/abort all files being written. */
857  private void closeAllFilesBeingWritten(final boolean abort) {
858    for(;;) {
859      final long inodeId;
860      final DFSOutputStream out;
861      synchronized(filesBeingWritten) {
862        if (filesBeingWritten.isEmpty()) {
863          return;
864        }
865        inodeId = filesBeingWritten.keySet().iterator().next();
866        out = filesBeingWritten.remove(inodeId);
867      }
868      if (out != null) {
869        try {
870          if (abort) {
871            out.abort();
872          } else {
873            out.close();
874          }
875        } catch(IOException ie) {
876          LOG.error("Failed to " + (abort? "abort": "close") +
877                  " inode " + inodeId, ie);
878        }
879      }
880    }
881  }
882
883  /**
884   * Close the file system, abandoning all of the leases and files being
885   * created and close connections to the namenode.
886   */
887  @Override
888  public synchronized void close() throws IOException {
889    if(clientRunning) {
890      closeAllFilesBeingWritten(false);
891      clientRunning = false;
892      getLeaseRenewer().closeClient(this);
893      // close connections to the namenode
894      closeConnectionToNamenode();
895    }
896  }
897
898  /**
899   * Get the default block size for this cluster
900   * @return the default block size in bytes
901   */
902  public long getDefaultBlockSize() {
903    return dfsClientConf.defaultBlockSize;
904  }
905    
906  /**
907   * @see ClientProtocol#getPreferredBlockSize(String)
908   */
909  public long getBlockSize(String f) throws IOException {
910    try {
911      return namenode.getPreferredBlockSize(f);
912    } catch (IOException ie) {
913      LOG.warn("Problem getting block size", ie);
914      throw ie;
915    }
916  }
917
918  /**
919   * Get server default values for a number of configuration params.
920   * @see ClientProtocol#getServerDefaults()
921   */
922  public FsServerDefaults getServerDefaults() throws IOException {
923    long now = Time.now();
924    if (now - serverDefaultsLastUpdate > SERVER_DEFAULTS_VALIDITY_PERIOD) {
925      serverDefaults = namenode.getServerDefaults();
926      serverDefaultsLastUpdate = now;
927    }
928    return serverDefaults;
929  }
930  
931  /**
932   * Get a canonical token service name for this client's tokens.  Null should
933   * be returned if the client is not using tokens.
934   * @return the token service for the client
935   */
936  @InterfaceAudience.LimitedPrivate( { "HDFS" }) 
937  public String getCanonicalServiceName() {
938    return (dtService != null) ? dtService.toString() : null;
939  }
940  
941  /**
942   * @see ClientProtocol#getDelegationToken(Text)
943   */
944  public Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
945      throws IOException {
946    assert dtService != null;
947    Token<DelegationTokenIdentifier> token =
948      namenode.getDelegationToken(renewer);
949
950    if (token != null) {
951      token.setService(this.dtService);
952      LOG.info("Created " + DelegationTokenIdentifier.stringifyToken(token));
953    } else {
954      LOG.info("Cannot get delegation token from " + renewer);
955    }
956    return token;
957
958  }
959
960  /**
961   * Renew a delegation token
962   * @param token the token to renew
963   * @return the new expiration time
964   * @throws InvalidToken
965   * @throws IOException
966   * @deprecated Use Token.renew instead.
967   */
968  @Deprecated
969  public long renewDelegationToken(Token<DelegationTokenIdentifier> token)
970      throws InvalidToken, IOException {
971    LOG.info("Renewing " + DelegationTokenIdentifier.stringifyToken(token));
972    try {
973      return token.renew(conf);
974    } catch (InterruptedException ie) {                                       
975      throw new RuntimeException("caught interrupted", ie);
976    } catch (RemoteException re) {
977      throw re.unwrapRemoteException(InvalidToken.class,
978                                     AccessControlException.class);
979    }
980  }
981  
982  private static final Map<String, Boolean> localAddrMap = Collections
983      .synchronizedMap(new HashMap<String, Boolean>());
984  
985  public static boolean isLocalAddress(InetSocketAddress targetAddr) {
986    InetAddress addr = targetAddr.getAddress();
987    Boolean cached = localAddrMap.get(addr.getHostAddress());
988    if (cached != null) {
989      if (LOG.isTraceEnabled()) {
990        LOG.trace("Address " + targetAddr +
991                  (cached ? " is local" : " is not local"));
992      }
993      return cached;
994    }
995    
996    boolean local = NetUtils.isLocalAddress(addr);
997
998    if (LOG.isTraceEnabled()) {
999      LOG.trace("Address " + targetAddr +
1000                (local ? " is local" : " is not local"));
1001    }
1002    localAddrMap.put(addr.getHostAddress(), local);
1003    return local;
1004  }
1005  
1006  /**
1007   * Should the block access token be refetched on an exception
1008   * 
1009   * @param ex Exception received
1010   * @param targetAddr Target datanode address from where exception was received
1011   * @return true if block access token has expired or invalid and it should be
1012   *         refetched
1013   */
1014  private static boolean tokenRefetchNeeded(IOException ex,
1015      InetSocketAddress targetAddr) {
1016    /*
1017     * Get a new access token and retry. Retry is needed in 2 cases. 1) When
1018     * both NN and DN re-started while DFSClient holding a cached access token.
1019     * 2) In the case that NN fails to update its access key at pre-set interval
1020     * (by a wide margin) and subsequently restarts. In this case, DN
1021     * re-registers itself with NN and receives a new access key, but DN will
1022     * delete the old access key from its memory since it's considered expired
1023     * based on the estimated expiration date.
1024     */
1025    if (ex instanceof InvalidBlockTokenException || ex instanceof InvalidToken) {
1026      LOG.info("Access token was invalid when connecting to " + targetAddr
1027          + " : " + ex);
1028      return true;
1029    }
1030    return false;
1031  }
1032  
1033  /**
1034   * Cancel a delegation token
1035   * @param token the token to cancel
1036   * @throws InvalidToken
1037   * @throws IOException
1038   * @deprecated Use Token.cancel instead.
1039   */
1040  @Deprecated
1041  public void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
1042      throws InvalidToken, IOException {
1043    LOG.info("Cancelling " + DelegationTokenIdentifier.stringifyToken(token));
1044    try {
1045      token.cancel(conf);
1046     } catch (InterruptedException ie) {                                       
1047      throw new RuntimeException("caught interrupted", ie);
1048    } catch (RemoteException re) {
1049      throw re.unwrapRemoteException(InvalidToken.class,
1050                                     AccessControlException.class);
1051    }
1052  }
1053  
1054  @InterfaceAudience.Private
1055  public static class Renewer extends TokenRenewer {
1056    
1057    static {
1058      //Ensure that HDFS Configuration files are loaded before trying to use
1059      // the renewer.
1060      HdfsConfiguration.init();
1061    }
1062    
1063    @Override
1064    public boolean handleKind(Text kind) {
1065      return DelegationTokenIdentifier.HDFS_DELEGATION_KIND.equals(kind);
1066    }
1067
1068    @SuppressWarnings("unchecked")
1069    @Override
1070    public long renew(Token<?> token, Configuration conf) throws IOException {
1071      Token<DelegationTokenIdentifier> delToken = 
1072        (Token<DelegationTokenIdentifier>) token;
1073      ClientProtocol nn = getNNProxy(delToken, conf);
1074      try {
1075        return nn.renewDelegationToken(delToken);
1076      } catch (RemoteException re) {
1077        throw re.unwrapRemoteException(InvalidToken.class, 
1078                                       AccessControlException.class);
1079      }
1080    }
1081
1082    @SuppressWarnings("unchecked")
1083    @Override
1084    public void cancel(Token<?> token, Configuration conf) throws IOException {
1085      Token<DelegationTokenIdentifier> delToken = 
1086          (Token<DelegationTokenIdentifier>) token;
1087      LOG.info("Cancelling " + 
1088               DelegationTokenIdentifier.stringifyToken(delToken));
1089      ClientProtocol nn = getNNProxy(delToken, conf);
1090      try {
1091        nn.cancelDelegationToken(delToken);
1092      } catch (RemoteException re) {
1093        throw re.unwrapRemoteException(InvalidToken.class,
1094            AccessControlException.class);
1095      }
1096    }
1097    
1098    private static ClientProtocol getNNProxy(
1099        Token<DelegationTokenIdentifier> token, Configuration conf)
1100        throws IOException {
1101      URI uri = HAUtil.getServiceUriFromToken(HdfsConstants.HDFS_URI_SCHEME,
1102              token);
1103      if (HAUtil.isTokenForLogicalUri(token) &&
1104          !HAUtil.isLogicalUri(conf, uri)) {
1105        // If the token is for a logical nameservice, but the configuration
1106        // we have disagrees about that, we can't actually renew it.
1107        // This can be the case in MR, for example, if the RM doesn't
1108        // have all of the HA clusters configured in its configuration.
1109        throw new IOException("Unable to map logical nameservice URI '" +
1110            uri + "' to a NameNode. Local configuration does not have " +
1111            "a failover proxy provider configured.");
1112      }
1113      
1114      NameNodeProxies.ProxyAndInfo<ClientProtocol> info =
1115        NameNodeProxies.createProxy(conf, uri, ClientProtocol.class);
1116      assert info.getDelegationTokenService().equals(token.getService()) :
1117        "Returned service '" + info.getDelegationTokenService().toString() +
1118        "' doesn't match expected service '" +
1119        token.getService().toString() + "'";
1120        
1121      return info.getProxy();
1122    }
1123
1124    @Override
1125    public boolean isManaged(Token<?> token) throws IOException {
1126      return true;
1127    }
1128    
1129  }
1130
1131  /**
1132   * Report corrupt blocks that were discovered by the client.
1133   * @see ClientProtocol#reportBadBlocks(LocatedBlock[])
1134   */
1135  public void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
1136    namenode.reportBadBlocks(blocks);
1137  }
1138  
1139  public short getDefaultReplication() {
1140    return dfsClientConf.defaultReplication;
1141  }
1142  
1143  public LocatedBlocks getLocatedBlocks(String src, long start)
1144      throws IOException {
1145    return getLocatedBlocks(src, start, dfsClientConf.prefetchSize);
1146  }
1147
1148  /*
1149   * This is just a wrapper around callGetBlockLocations, but non-static so that
1150   * we can stub it out for tests.
1151   */
1152  @VisibleForTesting
1153  public LocatedBlocks getLocatedBlocks(String src, long start, long length)
1154      throws IOException {
1155    return callGetBlockLocations(namenode, src, start, length);
1156  }
1157
1158  /**
1159   * @see ClientProtocol#getBlockLocations(String, long, long)
1160   */
1161  static LocatedBlocks callGetBlockLocations(ClientProtocol namenode,
1162      String src, long start, long length) 
1163      throws IOException {
1164    try {
1165      return namenode.getBlockLocations(src, start, length);
1166    } catch(RemoteException re) {
1167      throw re.unwrapRemoteException(AccessControlException.class,
1168                                     FileNotFoundException.class,
1169                                     UnresolvedPathException.class);
1170    }
1171  }
1172
1173  /**
1174   * Recover a file's lease
1175   * @param src a file's path
1176   * @return true if the file is already closed
1177   * @throws IOException
1178   */
1179  boolean recoverLease(String src) throws IOException {
1180    checkOpen();
1181
1182    try {
1183      return namenode.recoverLease(src, clientName);
1184    } catch (RemoteException re) {
1185      throw re.unwrapRemoteException(FileNotFoundException.class,
1186                                     AccessControlException.class,
1187                                     UnresolvedPathException.class);
1188    }
1189  }
1190
1191  /**
1192   * Get block location info about file
1193   * 
1194   * getBlockLocations() returns a list of hostnames that store 
1195   * data for a specific file region.  It returns a set of hostnames
1196   * for every block within the indicated region.
1197   *
1198   * This function is very useful when writing code that considers
1199   * data-placement when performing operations.  For example, the
1200   * MapReduce system tries to schedule tasks on the same machines
1201   * as the data-block the task processes. 
1202   */
1203  public BlockLocation[] getBlockLocations(String src, long start, 
1204    long length) throws IOException, UnresolvedLinkException {
1205    LocatedBlocks blocks = getLocatedBlocks(src, start, length);
1206    BlockLocation[] locations =  DFSUtil.locatedBlocks2Locations(blocks);
1207    HdfsBlockLocation[] hdfsLocations = new HdfsBlockLocation[locations.length];
1208    for (int i = 0; i < locations.length; i++) {
1209      hdfsLocations[i] = new HdfsBlockLocation(locations[i], blocks.get(i));
1210    }
1211    return hdfsLocations;
1212  }
1213  
1214  /**
1215   * Get block location information about a list of {@link HdfsBlockLocation}.
1216   * Used by {@link DistributedFileSystem#getFileBlockStorageLocations(List)} to
1217   * get {@link BlockStorageLocation}s for blocks returned by
1218   * {@link DistributedFileSystem#getFileBlockLocations(org.apache.hadoop.fs.FileStatus, long, long)}
1219   * .
1220   * 
1221   * This is done by making a round of RPCs to the associated datanodes, asking
1222   * the volume of each block replica. The returned array of
1223   * {@link BlockStorageLocation} expose this information as a
1224   * {@link VolumeId}.
1225   * 
1226   * @param blockLocations
1227   *          target blocks on which to query volume location information
1228   * @return volumeBlockLocations original block array augmented with additional
1229   *         volume location information for each replica.
1230   */
1231  public BlockStorageLocation[] getBlockStorageLocations(
1232      List<BlockLocation> blockLocations) throws IOException,
1233      UnsupportedOperationException, InvalidBlockTokenException {
1234    if (!getConf().getHdfsBlocksMetadataEnabled) {
1235      throw new UnsupportedOperationException("Datanode-side support for " +
1236          "getVolumeBlockLocations() must also be enabled in the client " +
1237          "configuration.");
1238    }
1239    // Downcast blockLocations and fetch out required LocatedBlock(s)
1240    List<LocatedBlock> blocks = new ArrayList<LocatedBlock>();
1241    for (BlockLocation loc : blockLocations) {
1242      if (!(loc instanceof HdfsBlockLocation)) {
1243        throw new ClassCastException("DFSClient#getVolumeBlockLocations " +
1244            "expected to be passed HdfsBlockLocations");
1245      }
1246      HdfsBlockLocation hdfsLoc = (HdfsBlockLocation) loc;
1247      blocks.add(hdfsLoc.getLocatedBlock());
1248    }
1249    
1250    // Re-group the LocatedBlocks to be grouped by datanodes, with the values
1251    // a list of the LocatedBlocks on the datanode.
1252    Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks = 
1253        new LinkedHashMap<DatanodeInfo, List<LocatedBlock>>();
1254    for (LocatedBlock b : blocks) {
1255      for (DatanodeInfo info : b.getLocations()) {
1256        if (!datanodeBlocks.containsKey(info)) {
1257          datanodeBlocks.put(info, new ArrayList<LocatedBlock>());
1258        }
1259        List<LocatedBlock> l = datanodeBlocks.get(info);
1260        l.add(b);
1261      }
1262    }
1263        
1264    // Make RPCs to the datanodes to get volume locations for its replicas
1265    Map<DatanodeInfo, HdfsBlocksMetadata> metadatas = BlockStorageLocationUtil
1266        .queryDatanodesForHdfsBlocksMetadata(conf, datanodeBlocks,
1267            getConf().getFileBlockStorageLocationsNumThreads,
1268            getConf().getFileBlockStorageLocationsTimeoutMs,
1269            getConf().connectToDnViaHostname);
1270    
1271    if (LOG.isTraceEnabled()) {
1272      LOG.trace("metadata returned: "
1273          + Joiner.on("\n").withKeyValueSeparator("=").join(metadatas));
1274    }
1275    
1276    // Regroup the returned VolumeId metadata to again be grouped by
1277    // LocatedBlock rather than by datanode
1278    Map<LocatedBlock, List<VolumeId>> blockVolumeIds = BlockStorageLocationUtil
1279        .associateVolumeIdsWithBlocks(blocks, metadatas);
1280    
1281    // Combine original BlockLocations with new VolumeId information
1282    BlockStorageLocation[] volumeBlockLocations = BlockStorageLocationUtil
1283        .convertToVolumeBlockLocations(blocks, blockVolumeIds);
1284
1285    return volumeBlockLocations;
1286  }
1287  
1288  public DFSInputStream open(String src) 
1289      throws IOException, UnresolvedLinkException {
1290    return open(src, dfsClientConf.ioBufferSize, true, null);
1291  }
1292
1293  /**
1294   * Create an input stream that obtains a nodelist from the
1295   * namenode, and then reads from all the right places.  Creates
1296   * inner subclass of InputStream that does the right out-of-band
1297   * work.
1298   * @deprecated Use {@link #open(String, int, boolean)} instead.
1299   */
1300  @Deprecated
1301  public DFSInputStream open(String src, int buffersize, boolean verifyChecksum,
1302                             FileSystem.Statistics stats)
1303      throws IOException, UnresolvedLinkException {
1304    return open(src, buffersize, verifyChecksum);
1305  }
1306  
1307
1308  /**
1309   * Create an input stream that obtains a nodelist from the
1310   * namenode, and then reads from all the right places.  Creates
1311   * inner subclass of InputStream that does the right out-of-band
1312   * work.
1313   */
1314  public DFSInputStream open(String src, int buffersize, boolean verifyChecksum)
1315      throws IOException, UnresolvedLinkException {
1316    checkOpen();
1317    //    Get block info from namenode
1318    return new DFSInputStream(this, src, buffersize, verifyChecksum);
1319  }
1320
1321  /**
1322   * Get the namenode associated with this DFSClient object
1323   * @return the namenode associated with this DFSClient object
1324   */
1325  public ClientProtocol getNamenode() {
1326    return namenode;
1327  }
1328  
1329  /**
1330   * Call {@link #create(String, boolean, short, long, Progressable)} with
1331   * default <code>replication</code> and <code>blockSize<code> and null <code>
1332   * progress</code>.
1333   */
1334  public OutputStream create(String src, boolean overwrite) 
1335      throws IOException {
1336    return create(src, overwrite, dfsClientConf.defaultReplication,
1337        dfsClientConf.defaultBlockSize, null);
1338  }
1339    
1340  /**
1341   * Call {@link #create(String, boolean, short, long, Progressable)} with
1342   * default <code>replication</code> and <code>blockSize<code>.
1343   */
1344  public OutputStream create(String src, 
1345                             boolean overwrite,
1346                             Progressable progress) throws IOException {
1347    return create(src, overwrite, dfsClientConf.defaultReplication,
1348        dfsClientConf.defaultBlockSize, progress);
1349  }
1350    
1351  /**
1352   * Call {@link #create(String, boolean, short, long, Progressable)} with
1353   * null <code>progress</code>.
1354   */
1355  public OutputStream create(String src, 
1356                             boolean overwrite, 
1357                             short replication,
1358                             long blockSize) throws IOException {
1359    return create(src, overwrite, replication, blockSize, null);
1360  }
1361
1362  /**
1363   * Call {@link #create(String, boolean, short, long, Progressable, int)}
1364   * with default bufferSize.
1365   */
1366  public OutputStream create(String src, boolean overwrite, short replication,
1367      long blockSize, Progressable progress) throws IOException {
1368    return create(src, overwrite, replication, blockSize, progress,
1369        dfsClientConf.ioBufferSize);
1370  }
1371
1372  /**
1373   * Call {@link #create(String, FsPermission, EnumSet, short, long, 
1374   * Progressable, int, ChecksumOpt)} with default <code>permission</code>
1375   * {@link FsPermission#getFileDefault()}.
1376   * 
1377   * @param src File name
1378   * @param overwrite overwrite an existing file if true
1379   * @param replication replication factor for the file
1380   * @param blockSize maximum block size
1381   * @param progress interface for reporting client progress
1382   * @param buffersize underlying buffersize
1383   * 
1384   * @return output stream
1385   */
1386  public OutputStream create(String src,
1387                             boolean overwrite,
1388                             short replication,
1389                             long blockSize,
1390                             Progressable progress,
1391                             int buffersize)
1392      throws IOException {
1393    return create(src, FsPermission.getFileDefault(),
1394        overwrite ? EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE)
1395            : EnumSet.of(CreateFlag.CREATE), replication, blockSize, progress,
1396        buffersize, null);
1397  }
1398
1399  /**
1400   * Call {@link #create(String, FsPermission, EnumSet, boolean, short, 
1401   * long, Progressable, int, ChecksumOpt)} with <code>createParent</code>
1402   *  set to true.
1403   */
1404  public DFSOutputStream create(String src, 
1405                             FsPermission permission,
1406                             EnumSet<CreateFlag> flag, 
1407                             short replication,
1408                             long blockSize,
1409                             Progressable progress,
1410                             int buffersize,
1411                             ChecksumOpt checksumOpt)
1412      throws IOException {
1413    return create(src, permission, flag, true,
1414        replication, blockSize, progress, buffersize, checksumOpt, null);
1415  }
1416
1417  /**
1418   * Create a new dfs file with the specified block replication 
1419   * with write-progress reporting and return an output stream for writing
1420   * into the file.  
1421   * 
1422   * @param src File name
1423   * @param permission The permission of the directory being created.
1424   *          If null, use default permission {@link FsPermission#getFileDefault()}
1425   * @param flag indicates create a new file or create/overwrite an
1426   *          existing file or append to an existing file
1427   * @param createParent create missing parent directory if true
1428   * @param replication block replication
1429   * @param blockSize maximum block size
1430   * @param progress interface for reporting client progress
1431   * @param buffersize underlying buffer size 
1432   * @param checksumOpt checksum options
1433   * 
1434   * @return output stream
1435   * 
1436   * @see ClientProtocol#create(String, FsPermission, String, EnumSetWritable,
1437   * boolean, short, long) for detailed description of exceptions thrown
1438   */
1439  public DFSOutputStream create(String src, 
1440                             FsPermission permission,
1441                             EnumSet<CreateFlag> flag, 
1442                             boolean createParent,
1443                             short replication,
1444                             long blockSize,
1445                             Progressable progress,
1446                             int buffersize,
1447                             ChecksumOpt checksumOpt) throws IOException {
1448    return create(src, permission, flag, createParent, replication, blockSize, 
1449        progress, buffersize, checksumOpt, null);
1450  }
1451
1452  /**
1453   * Same as {@link #create(String, FsPermission, EnumSet, boolean, short, long,
1454   * Progressable, int, ChecksumOpt)} with the addition of favoredNodes that is
1455   * a hint to where the namenode should place the file blocks.
1456   * The favored nodes hint is not persisted in HDFS. Hence it may be honored
1457   * at the creation time only. HDFS could move the blocks during balancing or
1458   * replication, to move the blocks from favored nodes. A value of null means
1459   * no favored nodes for this create
1460   */
1461  public DFSOutputStream create(String src, 
1462                             FsPermission permission,
1463                             EnumSet<CreateFlag> flag, 
1464                             boolean createParent,
1465                             short replication,
1466                             long blockSize,
1467                             Progressable progress,
1468                             int buffersize,
1469                             ChecksumOpt checksumOpt,
1470                             InetSocketAddress[] favoredNodes) throws IOException {
1471    checkOpen();
1472    if (permission == null) {
1473      permission = FsPermission.getFileDefault();
1474    }
1475    FsPermission masked = permission.applyUMask(dfsClientConf.uMask);
1476    if(LOG.isDebugEnabled()) {
1477      LOG.debug(src + ": masked=" + masked);
1478    }
1479    String[] favoredNodeStrs = null;
1480    if (favoredNodes != null) {
1481      favoredNodeStrs = new String[favoredNodes.length];
1482      for (int i = 0; i < favoredNodes.length; i++) {
1483        favoredNodeStrs[i] = 
1484            favoredNodes[i].getHostName() + ":" 
1485                         + favoredNodes[i].getPort();
1486      }
1487    }
1488    final DFSOutputStream result = DFSOutputStream.newStreamForCreate(this,
1489        src, masked, flag, createParent, replication, blockSize, progress,
1490        buffersize, dfsClientConf.createChecksum(checksumOpt), favoredNodeStrs);
1491    beginFileLease(result.getFileId(), result);
1492    return result;
1493  }
1494  
1495  /**
1496   * Append to an existing file if {@link CreateFlag#APPEND} is present
1497   */
1498  private DFSOutputStream primitiveAppend(String src, EnumSet<CreateFlag> flag,
1499      int buffersize, Progressable progress) throws IOException {
1500    if (flag.contains(CreateFlag.APPEND)) {
1501      HdfsFileStatus stat = getFileInfo(src);
1502      if (stat == null) { // No file to append to
1503        // New file needs to be created if create option is present
1504        if (!flag.contains(CreateFlag.CREATE)) {
1505          throw new FileNotFoundException("failed to append to non-existent file "
1506              + src + " on client " + clientName);
1507        }
1508        return null;
1509      }
1510      return callAppend(stat, src, buffersize, progress);
1511    }
1512    return null;
1513  }
1514  
1515  /**
1516   * Same as {{@link #create(String, FsPermission, EnumSet, short, long,
1517   *  Progressable, int, ChecksumOpt)} except that the permission
1518   *  is absolute (ie has already been masked with umask.
1519   */
1520  public DFSOutputStream primitiveCreate(String src, 
1521                             FsPermission absPermission,
1522                             EnumSet<CreateFlag> flag,
1523                             boolean createParent,
1524                             short replication,
1525                             long blockSize,
1526                             Progressable progress,
1527                             int buffersize,
1528                             ChecksumOpt checksumOpt)
1529      throws IOException, UnresolvedLinkException {
1530    checkOpen();
1531    CreateFlag.validate(flag);
1532    DFSOutputStream result = primitiveAppend(src, flag, buffersize, progress);
1533    if (result == null) {
1534      DataChecksum checksum = dfsClientConf.createChecksum(checksumOpt);
1535      result = DFSOutputStream.newStreamForCreate(this, src, absPermission,
1536          flag, createParent, replication, blockSize, progress, buffersize,
1537          checksum);
1538    }
1539    beginFileLease(result.getFileId(), result);
1540    return result;
1541  }
1542  
1543  /**
1544   * Creates a symbolic link.
1545   * 
1546   * @see ClientProtocol#createSymlink(String, String,FsPermission, boolean) 
1547   */
1548  public void createSymlink(String target, String link, boolean createParent)
1549      throws IOException {
1550    try {
1551      FsPermission dirPerm = 
1552          FsPermission.getDefault().applyUMask(dfsClientConf.uMask); 
1553      namenode.createSymlink(target, link, dirPerm, createParent);
1554    } catch (RemoteException re) {
1555      throw re.unwrapRemoteException(AccessControlException.class,
1556                                     FileAlreadyExistsException.class, 
1557                                     FileNotFoundException.class,
1558                                     ParentNotDirectoryException.class,
1559                                     NSQuotaExceededException.class, 
1560                                     DSQuotaExceededException.class,
1561                                     UnresolvedPathException.class,
1562                                     SnapshotAccessControlException.class);
1563    }
1564  }
1565
1566  /**
1567   * Resolve the *first* symlink, if any, in the path.
1568   * 
1569   * @see ClientProtocol#getLinkTarget(String)
1570   */
1571  public String getLinkTarget(String path) throws IOException { 
1572    checkOpen();
1573    try {
1574      return namenode.getLinkTarget(path);
1575    } catch (RemoteException re) {
1576      throw re.unwrapRemoteException(AccessControlException.class,
1577                                     FileNotFoundException.class);
1578    }
1579  }
1580
1581  /** Method to get stream returned by append call */
1582  private DFSOutputStream callAppend(HdfsFileStatus stat, String src,
1583      int buffersize, Progressable progress) throws IOException {
1584    LocatedBlock lastBlock = null;
1585    try {
1586      lastBlock = namenode.append(src, clientName);
1587    } catch(RemoteException re) {
1588      throw re.unwrapRemoteException(AccessControlException.class,
1589                                     FileNotFoundException.class,
1590                                     SafeModeException.class,
1591                                     DSQuotaExceededException.class,
1592                                     UnsupportedOperationException.class,
1593                                     UnresolvedPathException.class,
1594                                     SnapshotAccessControlException.class);
1595    }
1596    return DFSOutputStream.newStreamForAppend(this, src, buffersize, progress,
1597        lastBlock, stat, dfsClientConf.createChecksum());
1598  }
1599  
1600  /**
1601   * Append to an existing HDFS file.  
1602   * 
1603   * @param src file name
1604   * @param buffersize buffer size
1605   * @param progress for reporting write-progress; null is acceptable.
1606   * @param statistics file system statistics; null is acceptable.
1607   * @return an output stream for writing into the file
1608   * 
1609   * @see ClientProtocol#append(String, String) 
1610   */
1611  public HdfsDataOutputStream append(final String src, final int buffersize,
1612      final Progressable progress, final FileSystem.Statistics statistics
1613      ) throws IOException {
1614    final DFSOutputStream out = append(src, buffersize, progress);
1615    return new HdfsDataOutputStream(out, statistics, out.getInitialLen());
1616  }
1617
1618  private DFSOutputStream append(String src, int buffersize, Progressable progress) 
1619      throws IOException {
1620    checkOpen();
1621    HdfsFileStatus stat = getFileInfo(src);
1622    if (stat == null) { // No file found
1623      throw new FileNotFoundException("failed to append to non-existent file "
1624          + src + " on client " + clientName);
1625    }
1626    final DFSOutputStream result = callAppend(stat, src, buffersize, progress);
1627    beginFileLease(result.getFileId(), result);
1628    return result;
1629  }
1630
1631  /**
1632   * Set replication for an existing file.
1633   * @param src file name
1634   * @param replication replication to set the file to
1635   * 
1636   * @see ClientProtocol#setReplication(String, short)
1637   */
1638  public boolean setReplication(String src, short replication)
1639      throws IOException {
1640    try {
1641      return namenode.setReplication(src, replication);
1642    } catch(RemoteException re) {
1643      throw re.unwrapRemoteException(AccessControlException.class,
1644                                     FileNotFoundException.class,
1645                                     SafeModeException.class,
1646                                     DSQuotaExceededException.class,
1647                                     UnresolvedPathException.class,
1648                                     SnapshotAccessControlException.class);
1649    }
1650  }
1651
1652  /**
1653   * Rename file or directory.
1654   * @see ClientProtocol#rename(String, String)
1655   * @deprecated Use {@link #rename(String, String, Options.Rename...)} instead.
1656   */
1657  @Deprecated
1658  public boolean rename(String src, String dst) throws IOException {
1659    checkOpen();
1660    try {
1661      return namenode.rename(src, dst);
1662    } catch(RemoteException re) {
1663      throw re.unwrapRemoteException(AccessControlException.class,
1664                                     NSQuotaExceededException.class,
1665                                     DSQuotaExceededException.class,
1666                                     UnresolvedPathException.class,
1667                                     SnapshotAccessControlException.class);
1668    }
1669  }
1670
1671  /**
1672   * Move blocks from src to trg and delete src
1673   * See {@link ClientProtocol#concat(String, String [])}. 
1674   */
1675  public void concat(String trg, String [] srcs) throws IOException {
1676    checkOpen();
1677    try {
1678      namenode.concat(trg, srcs);
1679    } catch(RemoteException re) {
1680      throw re.unwrapRemoteException(AccessControlException.class,
1681                                     UnresolvedPathException.class,
1682                                     SnapshotAccessControlException.class);
1683    }
1684  }
1685  /**
1686   * Rename file or directory.
1687   * @see ClientProtocol#rename2(String, String, Options.Rename...)
1688   */
1689  public void rename(String src, String dst, Options.Rename... options)
1690      throws IOException {
1691    checkOpen();
1692    try {
1693      namenode.rename2(src, dst, options);
1694    } catch(RemoteException re) {
1695      throw re.unwrapRemoteException(AccessControlException.class,
1696                                     DSQuotaExceededException.class,
1697                                     FileAlreadyExistsException.class,
1698                                     FileNotFoundException.class,
1699                                     ParentNotDirectoryException.class,
1700                                     SafeModeException.class,
1701                                     NSQuotaExceededException.class,
1702                                     UnresolvedPathException.class,
1703                                     SnapshotAccessControlException.class);
1704    }
1705  }
1706  /**
1707   * Delete file or directory.
1708   * See {@link ClientProtocol#delete(String, boolean)}. 
1709   */
1710  @Deprecated
1711  public boolean delete(String src) throws IOException {
1712    checkOpen();
1713    return namenode.delete(src, true);
1714  }
1715
1716  /**
1717   * delete file or directory.
1718   * delete contents of the directory if non empty and recursive 
1719   * set to true
1720   *
1721   * @see ClientProtocol#delete(String, boolean)
1722   */
1723  public boolean delete(String src, boolean recursive) throws IOException {
1724    checkOpen();
1725    try {
1726      return namenode.delete(src, recursive);
1727    } catch(RemoteException re) {
1728      throw re.unwrapRemoteException(AccessControlException.class,
1729                                     FileNotFoundException.class,
1730                                     SafeModeException.class,
1731                                     UnresolvedPathException.class,
1732                                     SnapshotAccessControlException.class);
1733    }
1734  }
1735  
1736  /** Implemented using getFileInfo(src)
1737   */
1738  public boolean exists(String src) throws IOException {
1739    checkOpen();
1740    return getFileInfo(src) != null;
1741  }
1742
1743  /**
1744   * Get a partial listing of the indicated directory
1745   * No block locations need to be fetched
1746   */
1747  public DirectoryListing listPaths(String src,  byte[] startAfter)
1748    throws IOException {
1749    return listPaths(src, startAfter, false);
1750  }
1751  
1752  /**
1753   * Get a partial listing of the indicated directory
1754   *
1755   * Recommend to use HdfsFileStatus.EMPTY_NAME as startAfter
1756   * if the application wants to fetch a listing starting from
1757   * the first entry in the directory
1758   *
1759   * @see ClientProtocol#getListing(String, byte[], boolean)
1760   */
1761  public DirectoryListing listPaths(String src,  byte[] startAfter,
1762      boolean needLocation) 
1763    throws IOException {
1764    checkOpen();
1765    try {
1766      return namenode.getListing(src, startAfter, needLocation);
1767    } catch(RemoteException re) {
1768      throw re.unwrapRemoteException(AccessControlException.class,
1769                                     FileNotFoundException.class,
1770                                     UnresolvedPathException.class);
1771    }
1772  }
1773
1774  /**
1775   * Get the file info for a specific file or directory.
1776   * @param src The string representation of the path to the file
1777   * @return object containing information regarding the file
1778   *         or null if file not found
1779   *         
1780   * @see ClientProtocol#getFileInfo(String) for description of exceptions
1781   */
1782  public HdfsFileStatus getFileInfo(String src) throws IOException {
1783    checkOpen();
1784    try {
1785      return namenode.getFileInfo(src);
1786    } catch(RemoteException re) {
1787      throw re.unwrapRemoteException(AccessControlException.class,
1788                                     FileNotFoundException.class,
1789                                     UnresolvedPathException.class);
1790    }
1791  }
1792  
1793  /**
1794   * Close status of a file
1795   * @return true if file is already closed
1796   */
1797  public boolean isFileClosed(String src) throws IOException{
1798    checkOpen();
1799    try {
1800      return namenode.isFileClosed(src);
1801    } catch(RemoteException re) {
1802      throw re.unwrapRemoteException(AccessControlException.class,
1803                                     FileNotFoundException.class,
1804                                     UnresolvedPathException.class);
1805    }
1806  }
1807  
1808  /**
1809   * Get the file info for a specific file or directory. If src
1810   * refers to a symlink then the FileStatus of the link is returned.
1811   * @param src path to a file or directory.
1812   * 
1813   * For description of exceptions thrown 
1814   * @see ClientProtocol#getFileLinkInfo(String)
1815   */
1816  public HdfsFileStatus getFileLinkInfo(String src) throws IOException {
1817    checkOpen();
1818    try {
1819      return namenode.getFileLinkInfo(src);
1820    } catch(RemoteException re) {
1821      throw re.unwrapRemoteException(AccessControlException.class,
1822                                     UnresolvedPathException.class);
1823     }
1824   }
1825
1826  /**
1827   * Get the checksum of the whole file of a range of the file. Note that the
1828   * range always starts from the beginning of the file.
1829   * @param src The file path
1830   * @param length The length of the range
1831   * @return The checksum 
1832   * @see DistributedFileSystem#getFileChecksum(Path)
1833   */
1834  public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length)
1835      throws IOException {
1836    checkOpen();
1837    Preconditions.checkArgument(length >= 0);
1838    return getFileChecksum(src, length, clientName, namenode,
1839        socketFactory, dfsClientConf.socketTimeout, getDataEncryptionKey(),
1840        dfsClientConf.connectToDnViaHostname);
1841  }
1842  
1843  @InterfaceAudience.Private
1844  public void clearDataEncryptionKey() {
1845    LOG.debug("Clearing encryption key");
1846    synchronized (this) {
1847      encryptionKey = null;
1848    }
1849  }
1850  
1851  /**
1852   * @return true if data sent between this client and DNs should be encrypted,
1853   *         false otherwise.
1854   * @throws IOException in the event of error communicating with the NN
1855   */
1856  boolean shouldEncryptData() throws IOException {
1857    FsServerDefaults d = getServerDefaults();
1858    return d == null ? false : d.getEncryptDataTransfer();
1859  }
1860  
1861  @InterfaceAudience.Private
1862  public DataEncryptionKey getDataEncryptionKey()
1863      throws IOException {
1864    if (shouldEncryptData() && 
1865        !this.trustedChannelResolver.isTrusted()) {
1866      synchronized (this) {
1867        if (encryptionKey == null ||
1868            encryptionKey.expiryDate < Time.now()) {
1869          LOG.debug("Getting new encryption token from NN");
1870          encryptionKey = namenode.getDataEncryptionKey();
1871        }
1872        return encryptionKey;
1873      }
1874    } else {
1875      return null;
1876    }
1877  }
1878
1879  /**
1880   * Get the checksum of the whole file or a range of the file.
1881   * @param src The file path
1882   * @param length the length of the range, i.e., the range is [0, length]
1883   * @param clientName the name of the client requesting the checksum.
1884   * @param namenode the RPC proxy for the namenode
1885   * @param socketFactory to create sockets to connect to DNs
1886   * @param socketTimeout timeout to use when connecting and waiting for a response
1887   * @param encryptionKey the key needed to communicate with DNs in this cluster
1888   * @param connectToDnViaHostname whether the client should use hostnames instead of IPs
1889   * @return The checksum 
1890   */
1891  private static MD5MD5CRC32FileChecksum getFileChecksum(String src,
1892      long length, String clientName, ClientProtocol namenode,
1893      SocketFactory socketFactory, int socketTimeout,
1894      DataEncryptionKey encryptionKey, boolean connectToDnViaHostname)
1895      throws IOException {
1896    //get block locations for the file range
1897    LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0,
1898        length);
1899    if (null == blockLocations) {
1900      throw new FileNotFoundException("File does not exist: " + src);
1901    }
1902    List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
1903    final DataOutputBuffer md5out = new DataOutputBuffer();
1904    int bytesPerCRC = -1;
1905    DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
1906    long crcPerBlock = 0;
1907    boolean refetchBlocks = false;
1908    int lastRetriedIndex = -1;
1909
1910    // get block checksum for each block
1911    long remaining = length;
1912    for(int i = 0; i < locatedblocks.size() && remaining > 0; i++) {
1913      if (refetchBlocks) {  // refetch to get fresh tokens
1914        blockLocations = callGetBlockLocations(namenode, src, 0, length);
1915        if (null == blockLocations) {
1916          throw new FileNotFoundException("File does not exist: " + src);
1917        }
1918        locatedblocks = blockLocations.getLocatedBlocks();
1919        refetchBlocks = false;
1920      }
1921      LocatedBlock lb = locatedblocks.get(i);
1922      final ExtendedBlock block = lb.getBlock();
1923      if (remaining < block.getNumBytes()) {
1924        block.setNumBytes(remaining);
1925      }
1926      remaining -= block.getNumBytes();
1927      final DatanodeInfo[] datanodes = lb.getLocations();
1928      
1929      //try each datanode location of the block
1930      final int timeout = 3000 * datanodes.length + socketTimeout;
1931      boolean done = false;
1932      for(int j = 0; !done && j < datanodes.length; j++) {
1933        DataOutputStream out = null;
1934        DataInputStream in = null;
1935        
1936        try {
1937          //connect to a datanode
1938          IOStreamPair pair = connectToDN(socketFactory, connectToDnViaHostname,
1939              encryptionKey, datanodes[j], timeout);
1940          out = new DataOutputStream(new BufferedOutputStream(pair.out,
1941              HdfsConstants.SMALL_BUFFER_SIZE));
1942          in = new DataInputStream(pair.in);
1943
1944          if (LOG.isDebugEnabled()) {
1945            LOG.debug("write to " + datanodes[j] + ": "
1946                + Op.BLOCK_CHECKSUM + ", block=" + block);
1947          }
1948          // get block MD5
1949          new Sender(out).blockChecksum(block, lb.getBlockToken());
1950
1951          final BlockOpResponseProto reply =
1952            BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));
1953
1954          if (reply.getStatus() != Status.SUCCESS) {
1955            if (reply.getStatus() == Status.ERROR_ACCESS_TOKEN) {
1956              throw new InvalidBlockTokenException();
1957            } else {
1958              throw new IOException("Bad response " + reply + " for block "
1959                  + block + " from datanode " + datanodes[j]);
1960            }
1961          }
1962          
1963          OpBlockChecksumResponseProto checksumData =
1964            reply.getChecksumResponse();
1965
1966          //read byte-per-checksum
1967          final int bpc = checksumData.getBytesPerCrc();
1968          if (i == 0) { //first block
1969            bytesPerCRC = bpc;
1970          }
1971          else if (bpc != bytesPerCRC) {
1972            throw new IOException("Byte-per-checksum not matched: bpc=" + bpc
1973                + " but bytesPerCRC=" + bytesPerCRC);
1974          }
1975          
1976          //read crc-per-block
1977          final long cpb = checksumData.getCrcPerBlock();
1978          if (locatedblocks.size() > 1 && i == 0) {
1979            crcPerBlock = cpb;
1980          }
1981
1982          //read md5
1983          final MD5Hash md5 = new MD5Hash(
1984              checksumData.getMd5().toByteArray());
1985          md5.write(md5out);
1986          
1987          // read crc-type
1988          final DataChecksum.Type ct;
1989          if (checksumData.hasCrcType()) {
1990            ct = PBHelper.convert(checksumData
1991                .getCrcType());
1992          } else {
1993            LOG.debug("Retrieving checksum from an earlier-version DataNode: " +
1994                      "inferring checksum by reading first byte");
1995            ct = inferChecksumTypeByReading(
1996                clientName, socketFactory, socketTimeout, lb, datanodes[j],
1997                encryptionKey, connectToDnViaHostname);
1998          }
1999
2000          if (i == 0) { // first block
2001            crcType = ct;
2002          } else if (crcType != DataChecksum.Type.MIXED
2003              && crcType != ct) {
2004            // if crc types are mixed in a file
2005            crcType = DataChecksum.Type.MIXED;
2006          }
2007
2008          done = true;
2009
2010          if (LOG.isDebugEnabled()) {
2011            if (i == 0) {
2012              LOG.debug("set bytesPerCRC=" + bytesPerCRC
2013                  + ", crcPerBlock=" + crcPerBlock);
2014            }
2015            LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5);
2016          }
2017        } catch (InvalidBlockTokenException ibte) {
2018          if (i > lastRetriedIndex) {
2019            if (LOG.isDebugEnabled()) {
2020              LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM "
2021                  + "for file " + src + " for block " + block
2022                  + " from datanode " + datanodes[j]
2023                  + ". Will retry the block once.");
2024            }
2025            lastRetriedIndex = i;
2026            done = true; // actually it's not done; but we'll retry
2027            i--; // repeat at i-th block
2028            refetchBlocks = true;
2029            break;
2030          }
2031        } catch (IOException ie) {
2032          LOG.warn("src=" + src + ", datanodes["+j+"]=" + datanodes[j], ie);
2033        } finally {
2034          IOUtils.closeStream(in);
2035          IOUtils.closeStream(out);
2036        }
2037      }
2038
2039      if (!done) {
2040        throw new IOException("Fail to get block MD5 for " + block);
2041      }
2042    }
2043
2044    //compute file MD5
2045    final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData()); 
2046    switch (crcType) {
2047      case CRC32:
2048        return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC,
2049            crcPerBlock, fileMD5);
2050      case CRC32C:
2051        return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC,
2052            crcPerBlock, fileMD5);
2053      default:
2054        // If there is no block allocated for the file,
2055        // return one with the magic entry that matches what previous
2056        // hdfs versions return.
2057        if (locatedblocks.size() == 0) {
2058          return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
2059        }
2060
2061        // we should never get here since the validity was checked
2062        // when getCrcType() was called above.
2063        return null;
2064    }
2065  }
2066
2067  /**
2068   * Connect to the given datanode's datantrasfer port, and return
2069   * the resulting IOStreamPair. This includes encryption wrapping, etc.
2070   */
2071  private static IOStreamPair connectToDN(
2072      SocketFactory socketFactory, boolean connectToDnViaHostname,
2073      DataEncryptionKey encryptionKey, DatanodeInfo dn, int timeout)
2074      throws IOException
2075  {
2076    boolean success = false;
2077    Socket sock = null;
2078    try {
2079      sock = socketFactory.createSocket();
2080      String dnAddr = dn.getXferAddr(connectToDnViaHostname);
2081      if (LOG.isDebugEnabled()) {
2082        LOG.debug("Connecting to datanode " + dnAddr);
2083      }
2084      NetUtils.connect(sock, NetUtils.createSocketAddr(dnAddr), timeout);
2085      sock.setSoTimeout(timeout);
2086  
2087      OutputStream unbufOut = NetUtils.getOutputStream(sock);
2088      InputStream unbufIn = NetUtils.getInputStream(sock);
2089      IOStreamPair ret;
2090      if (encryptionKey != null) {
2091        ret = DataTransferEncryptor.getEncryptedStreams(
2092                unbufOut, unbufIn, encryptionKey);
2093      } else {
2094        ret = new IOStreamPair(unbufIn, unbufOut);        
2095      }
2096      success = true;
2097      return ret;
2098    } finally {
2099      if (!success) {
2100        IOUtils.closeSocket(sock);
2101      }
2102    }
2103  }
2104  
2105  /**
2106   * Infer the checksum type for a replica by sending an OP_READ_BLOCK
2107   * for the first byte of that replica. This is used for compatibility
2108   * with older HDFS versions which did not include the checksum type in
2109   * OpBlockChecksumResponseProto.
2110   *
2111   * @param in input stream from datanode
2112   * @param out output stream to datanode
2113   * @param lb the located block
2114   * @param clientName the name of the DFSClient requesting the checksum
2115   * @param dn the connected datanode
2116   * @return the inferred checksum type
2117   * @throws IOException if an error occurs
2118   */
2119  private static Type inferChecksumTypeByReading(
2120      String clientName, SocketFactory socketFactory, int socketTimeout,
2121      LocatedBlock lb, DatanodeInfo dn,
2122      DataEncryptionKey encryptionKey, boolean connectToDnViaHostname)
2123      throws IOException {
2124    IOStreamPair pair = connectToDN(socketFactory, connectToDnViaHostname,
2125        encryptionKey, dn, socketTimeout);
2126
2127    try {
2128      DataOutputStream out = new DataOutputStream(new BufferedOutputStream(pair.out,
2129          HdfsConstants.SMALL_BUFFER_SIZE));
2130      DataInputStream in = new DataInputStream(pair.in);
2131  
2132      new Sender(out).readBlock(lb.getBlock(), lb.getBlockToken(), clientName,
2133          0, 1, true, CachingStrategy.newDefaultStrategy());
2134      final BlockOpResponseProto reply =
2135          BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));
2136      
2137      if (reply.getStatus() != Status.SUCCESS) {
2138        if (reply.getStatus() == Status.ERROR_ACCESS_TOKEN) {
2139          throw new InvalidBlockTokenException();
2140        } else {
2141          throw new IOException("Bad response " + reply + " trying to read "
2142              + lb.getBlock() + " from datanode " + dn);
2143        }
2144      }
2145      
2146      return PBHelper.convert(reply.getReadOpChecksumInfo().getChecksum().getType());
2147    } finally {
2148      IOUtils.cleanup(null, pair.in, pair.out);
2149    }
2150  }
2151
2152  /**
2153   * Set permissions to a file or directory.
2154   * @param src path name.
2155   * @param permission permission to set to
2156   * 
2157   * @see ClientProtocol#setPermission(String, FsPermission)
2158   */
2159  public void setPermission(String src, FsPermission permission)
2160      throws IOException {
2161    checkOpen();
2162    try {
2163      namenode.setPermission(src, permission);
2164    } catch(RemoteException re) {
2165      throw re.unwrapRemoteException(AccessControlException.class,
2166                                     FileNotFoundException.class,
2167                                     SafeModeException.class,
2168                                     UnresolvedPathException.class,
2169                                     SnapshotAccessControlException.class);
2170    }
2171  }
2172
2173  /**
2174   * Set file or directory owner.
2175   * @param src path name.
2176   * @param username user id.
2177   * @param groupname user group.
2178   * 
2179   * @see ClientProtocol#setOwner(String, String, String)
2180   */
2181  public void setOwner(String src, String username, String groupname)
2182      throws IOException {
2183    checkOpen();
2184    try {
2185      namenode.setOwner(src, username, groupname);
2186    } catch(RemoteException re) {
2187      throw re.unwrapRemoteException(AccessControlException.class,
2188                                     FileNotFoundException.class,
2189                                     SafeModeException.class,
2190                                     UnresolvedPathException.class,
2191                                     SnapshotAccessControlException.class);                                   
2192    }
2193  }
2194
2195  /**
2196   * @see ClientProtocol#getStats()
2197   */
2198  public FsStatus getDiskStatus() throws IOException {
2199    long rawNums[] = namenode.getStats();
2200    return new FsStatus(rawNums[0], rawNums[1], rawNums[2]);
2201  }
2202
2203  /**
2204   * Returns count of blocks with no good replicas left. Normally should be 
2205   * zero.
2206   * @throws IOException
2207   */ 
2208  public long getMissingBlocksCount() throws IOException {
2209    return namenode.getStats()[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX];
2210  }
2211  
2212  /**
2213   * Returns count of blocks with one of more replica missing.
2214   * @throws IOException
2215   */ 
2216  public long getUnderReplicatedBlocksCount() throws IOException {
2217    return namenode.getStats()[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX];
2218  }
2219  
2220  /**
2221   * Returns count of blocks with at least one replica marked corrupt. 
2222   * @throws IOException
2223   */ 
2224  public long getCorruptBlocksCount() throws IOException {
2225    return namenode.getStats()[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX];
2226  }
2227  
2228  /**
2229   * @return a list in which each entry describes a corrupt file/block
2230   * @throws IOException
2231   */
2232  public CorruptFileBlocks listCorruptFileBlocks(String path,
2233                                                 String cookie)
2234    throws IOException {
2235    return namenode.listCorruptFileBlocks(path, cookie);
2236  }
2237
2238  public DatanodeInfo[] datanodeReport(DatanodeReportType type)
2239  throws IOException {
2240    return namenode.getDatanodeReport(type);
2241  }
2242    
2243  /**
2244   * Enter, leave or get safe mode.
2245   * 
2246   * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction,boolean)
2247   */
2248  public boolean setSafeMode(SafeModeAction action) throws IOException {
2249    return setSafeMode(action, false);
2250  }
2251  
2252  /**
2253   * Enter, leave or get safe mode.
2254   * 
2255   * @param action
2256   *          One of SafeModeAction.GET, SafeModeAction.ENTER and
2257   *          SafeModeActiob.LEAVE
2258   * @param isChecked
2259   *          If true, then check only active namenode's safemode status, else
2260   *          check first namenode's status.
2261   * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean)
2262   */
2263  public boolean setSafeMode(SafeModeAction action, boolean isChecked) throws IOException{
2264    return namenode.setSafeMode(action, isChecked);    
2265  }
2266 
2267  /**
2268   * Create one snapshot.
2269   * 
2270   * @param snapshotRoot The directory where the snapshot is to be taken
2271   * @param snapshotName Name of the snapshot
2272   * @return the snapshot path.
2273   * @see ClientProtocol#createSnapshot(String, String)
2274   */
2275  public String createSnapshot(String snapshotRoot, String snapshotName)
2276      throws IOException {
2277    checkOpen();
2278    try {
2279      return namenode.createSnapshot(snapshotRoot, snapshotName);
2280    } catch(RemoteException re) {
2281      throw re.unwrapRemoteException();
2282    }
2283  }
2284  
2285  /**
2286   * Delete a snapshot of a snapshottable directory.
2287   * 
2288   * @param snapshotRoot The snapshottable directory that the 
2289   *                    to-be-deleted snapshot belongs to
2290   * @param snapshotName The name of the to-be-deleted snapshot
2291   * @throws IOException
2292   * @see ClientProtocol#deleteSnapshot(String, String)
2293   */
2294  public void deleteSnapshot(String snapshotRoot, String snapshotName)
2295      throws IOException {
2296    try {
2297      namenode.deleteSnapshot(snapshotRoot, snapshotName);
2298    } catch(RemoteException re) {
2299      throw re.unwrapRemoteException();
2300    }
2301  }
2302  
2303  /**
2304   * Rename a snapshot.
2305   * @param snapshotDir The directory path where the snapshot was taken
2306   * @param snapshotOldName Old name of the snapshot
2307   * @param snapshotNewName New name of the snapshot
2308   * @throws IOException
2309   * @see ClientProtocol#renameSnapshot(String, String, String)
2310   */
2311  public void renameSnapshot(String snapshotDir, String snapshotOldName,
2312      String snapshotNewName) throws IOException {
2313    checkOpen();
2314    try {
2315      namenode.renameSnapshot(snapshotDir, snapshotOldName, snapshotNewName);
2316    } catch(RemoteException re) {
2317      throw re.unwrapRemoteException();
2318    }
2319  }
2320  
2321  /**
2322   * Get all the current snapshottable directories.
2323   * @return All the current snapshottable directories
2324   * @throws IOException
2325   * @see ClientProtocol#getSnapshottableDirListing()
2326   */
2327  public SnapshottableDirectoryStatus[] getSnapshottableDirListing()
2328      throws IOException {
2329    checkOpen();
2330    try {
2331      return namenode.getSnapshottableDirListing();
2332    } catch(RemoteException re) {
2333      throw re.unwrapRemoteException();
2334    }
2335  }
2336
2337  /**
2338   * Allow snapshot on a directory.
2339   * 
2340   * @see ClientProtocol#allowSnapshot(String snapshotRoot)
2341   */
2342  public void allowSnapshot(String snapshotRoot) throws IOException {
2343    checkOpen();
2344    try {
2345      namenode.allowSnapshot(snapshotRoot);
2346    } catch (RemoteException re) {
2347      throw re.unwrapRemoteException();
2348    }
2349  }
2350  
2351  /**
2352   * Disallow snapshot on a directory.
2353   * 
2354   * @see ClientProtocol#disallowSnapshot(String snapshotRoot)
2355   */
2356  public void disallowSnapshot(String snapshotRoot) throws IOException {
2357    checkOpen();
2358    try {
2359      namenode.disallowSnapshot(snapshotRoot);
2360    } catch (RemoteException re) {
2361      throw re.unwrapRemoteException();
2362    }
2363  }
2364  
2365  /**
2366   * Get the difference between two snapshots, or between a snapshot and the
2367   * current tree of a directory.
2368   * @see ClientProtocol#getSnapshotDiffReport(String, String, String)
2369   */
2370  public SnapshotDiffReport getSnapshotDiffReport(String snapshotDir,
2371      String fromSnapshot, String toSnapshot) throws IOException {
2372    checkOpen();
2373    try {
2374      return namenode.getSnapshotDiffReport(snapshotDir,
2375          fromSnapshot, toSnapshot);
2376    } catch(RemoteException re) {
2377      throw re.unwrapRemoteException();
2378    }
2379  }
2380
2381  public long addCacheDirective(
2382      CacheDirectiveInfo info, EnumSet<CacheFlag> flags) throws IOException {
2383    checkOpen();
2384    try {
2385      return namenode.addCacheDirective(info, flags);
2386    } catch (RemoteException re) {
2387      throw re.unwrapRemoteException();
2388    }
2389  }
2390  
2391  public void modifyCacheDirective(
2392      CacheDirectiveInfo info, EnumSet<CacheFlag> flags) throws IOException {
2393    checkOpen();
2394    try {
2395      namenode.modifyCacheDirective(info, flags);
2396    } catch (RemoteException re) {
2397      throw re.unwrapRemoteException();
2398    }
2399  }
2400
2401  public void removeCacheDirective(long id)
2402      throws IOException {
2403    checkOpen();
2404    try {
2405      namenode.removeCacheDirective(id);
2406    } catch (RemoteException re) {
2407      throw re.unwrapRemoteException();
2408    }
2409  }
2410  
2411  public RemoteIterator<CacheDirectiveEntry> listCacheDirectives(
2412      CacheDirectiveInfo filter) throws IOException {
2413    return new CacheDirectiveIterator(namenode, filter);
2414  }
2415
2416  public void addCachePool(CachePoolInfo info) throws IOException {
2417    checkOpen();
2418    try {
2419      namenode.addCachePool(info);
2420    } catch (RemoteException re) {
2421      throw re.unwrapRemoteException();
2422    }
2423  }
2424
2425  public void modifyCachePool(CachePoolInfo info) throws IOException {
2426    checkOpen();
2427    try {
2428      namenode.modifyCachePool(info);
2429    } catch (RemoteException re) {
2430      throw re.unwrapRemoteException();
2431    }
2432  }
2433
2434  public void removeCachePool(String poolName) throws IOException {
2435    checkOpen();
2436    try {
2437      namenode.removeCachePool(poolName);
2438    } catch (RemoteException re) {
2439      throw re.unwrapRemoteException();
2440    }
2441  }
2442
2443  public RemoteIterator<CachePoolEntry> listCachePools() throws IOException {
2444    return new CachePoolIterator(namenode);
2445  }
2446
2447  /**
2448   * Save namespace image.
2449   * 
2450   * @see ClientProtocol#saveNamespace()
2451   */
2452  void saveNamespace() throws AccessControlException, IOException {
2453    try {
2454      namenode.saveNamespace();
2455    } catch(RemoteException re) {
2456      throw re.unwrapRemoteException(AccessControlException.class);
2457    }
2458  }
2459
2460  /**
2461   * Rolls the edit log on the active NameNode.
2462   * @return the txid of the new log segment 
2463   *
2464   * @see ClientProtocol#rollEdits()
2465   */
2466  long rollEdits() throws AccessControlException, IOException {
2467    try {
2468      return namenode.rollEdits();
2469    } catch(RemoteException re) {
2470      throw re.unwrapRemoteException(AccessControlException.class);
2471    }
2472  }
2473
2474  @VisibleForTesting
2475  ExtendedBlock getPreviousBlock(long fileId) {
2476    return filesBeingWritten.get(fileId).getBlock();
2477  }
2478  
2479  /**
2480   * enable/disable restore failed storage.
2481   * 
2482   * @see ClientProtocol#restoreFailedStorage(String arg)
2483   */
2484  boolean restoreFailedStorage(String arg)
2485      throws AccessControlException, IOException{
2486    return namenode.restoreFailedStorage(arg);
2487  }
2488
2489  /**
2490   * Refresh the hosts and exclude files.  (Rereads them.)
2491   * See {@link ClientProtocol#refreshNodes()} 
2492   * for more details.
2493   * 
2494   * @see ClientProtocol#refreshNodes()
2495   */
2496  public void refreshNodes() throws IOException {
2497    namenode.refreshNodes();
2498  }
2499
2500  /**
2501   * Dumps DFS data structures into specified file.
2502   * 
2503   * @see ClientProtocol#metaSave(String)
2504   */
2505  public void metaSave(String pathname) throws IOException {
2506    namenode.metaSave(pathname);
2507  }
2508
2509  /**
2510   * Requests the namenode to tell all datanodes to use a new, non-persistent
2511   * bandwidth value for dfs.balance.bandwidthPerSec.
2512   * See {@link ClientProtocol#setBalancerBandwidth(long)} 
2513   * for more details.
2514   * 
2515   * @see ClientProtocol#setBalancerBandwidth(long)
2516   */
2517  public void setBalancerBandwidth(long bandwidth) throws IOException {
2518    namenode.setBalancerBandwidth(bandwidth);
2519  }
2520    
2521  /**
2522   * @see ClientProtocol#finalizeUpgrade()
2523   */
2524  public void finalizeUpgrade() throws IOException {
2525    namenode.finalizeUpgrade();
2526  }
2527
2528  RollingUpgradeInfo rollingUpgrade(RollingUpgradeAction action) throws IOException {
2529    return namenode.rollingUpgrade(action);
2530  }
2531
2532  /**
2533   */
2534  @Deprecated
2535  public boolean mkdirs(String src) throws IOException {
2536    return mkdirs(src, null, true);
2537  }
2538
2539  /**
2540   * Create a directory (or hierarchy of directories) with the given
2541   * name and permission.
2542   *
2543   * @param src The path of the directory being created
2544   * @param permission The permission of the directory being created.
2545   * If permission == null, use {@link FsPermission#getDefault()}.
2546   * @param createParent create missing parent directory if true
2547   * 
2548   * @return True if the operation success.
2549   * 
2550   * @see ClientProtocol#mkdirs(String, FsPermission, boolean)
2551   */
2552  public boolean mkdirs(String src, FsPermission permission,
2553      boolean createParent) throws IOException {
2554    if (permission == null) {
2555      permission = FsPermission.getDefault();
2556    }
2557    FsPermission masked = permission.applyUMask(dfsClientConf.uMask);
2558    return primitiveMkdir(src, masked, createParent);
2559  }
2560
2561  /**
2562   * Same {{@link #mkdirs(String, FsPermission, boolean)} except
2563   * that the permissions has already been masked against umask.
2564   */
2565  public boolean primitiveMkdir(String src, FsPermission absPermission)
2566    throws IOException {
2567    return primitiveMkdir(src, absPermission, true);
2568  }
2569
2570  /**
2571   * Same {{@link #mkdirs(String, FsPermission, boolean)} except
2572   * that the permissions has already been masked against umask.
2573   */
2574  public boolean primitiveMkdir(String src, FsPermission absPermission, 
2575    boolean createParent)
2576    throws IOException {
2577    checkOpen();
2578    if (absPermission == null) {
2579      absPermission = 
2580        FsPermission.getDefault().applyUMask(dfsClientConf.uMask);
2581    } 
2582
2583    if(LOG.isDebugEnabled()) {
2584      LOG.debug(src + ": masked=" + absPermission);
2585    }
2586    try {
2587      return namenode.mkdirs(src, absPermission, createParent);
2588    } catch(RemoteException re) {
2589      throw re.unwrapRemoteException(AccessControlException.class,
2590                                     InvalidPathException.class,
2591                                     FileAlreadyExistsException.class,
2592                                     FileNotFoundException.class,
2593                                     ParentNotDirectoryException.class,
2594                                     SafeModeException.class,
2595                                     NSQuotaExceededException.class,
2596                                     DSQuotaExceededException.class,
2597                                     UnresolvedPathException.class,
2598                                     SnapshotAccessControlException.class);
2599    }
2600  }
2601  
2602  /**
2603   * Get {@link ContentSummary} rooted at the specified directory.
2604   * @param path The string representation of the path
2605   * 
2606   * @see ClientProtocol#getContentSummary(String)
2607   */
2608  ContentSummary getContentSummary(String src) throws IOException {
2609    try {
2610      return namenode.getContentSummary(src);
2611    } catch(RemoteException re) {
2612      throw re.unwrapRemoteException(AccessControlException.class,
2613                                     FileNotFoundException.class,
2614                                     UnresolvedPathException.class);
2615    }
2616  }
2617
2618  /**
2619   * Sets or resets quotas for a directory.
2620   * @see ClientProtocol#setQuota(String, long, long)
2621   */
2622  void setQuota(String src, long namespaceQuota, long diskspaceQuota) 
2623      throws IOException {
2624    // sanity check
2625    if ((namespaceQuota <= 0 && namespaceQuota != HdfsConstants.QUOTA_DONT_SET &&
2626         namespaceQuota != HdfsConstants.QUOTA_RESET) ||
2627        (diskspaceQuota <= 0 && diskspaceQuota != HdfsConstants.QUOTA_DONT_SET &&
2628         diskspaceQuota != HdfsConstants.QUOTA_RESET)) {
2629      throw new IllegalArgumentException("Invalid values for quota : " +
2630                                         namespaceQuota + " and " + 
2631                                         diskspaceQuota);
2632                                         
2633    }
2634    try {
2635      namenode.setQuota(src, namespaceQuota, diskspaceQuota);
2636    } catch(RemoteException re) {
2637      throw re.unwrapRemoteException(AccessControlException.class,
2638                                     FileNotFoundException.class,
2639                                     NSQuotaExceededException.class,
2640                                     DSQuotaExceededException.class,
2641                                     UnresolvedPathException.class,
2642                                     SnapshotAccessControlException.class);
2643    }
2644  }
2645
2646  /**
2647   * set the modification and access time of a file
2648   * 
2649   * @see ClientProtocol#setTimes(String, long, long)
2650   */
2651  public void setTimes(String src, long mtime, long atime) throws IOException {
2652    checkOpen();
2653    try {
2654      namenode.setTimes(src, mtime, atime);
2655    } catch(RemoteException re) {
2656      throw re.unwrapRemoteException(AccessControlException.class,
2657                                     FileNotFoundException.class,
2658                                     UnresolvedPathException.class,
2659                                     SnapshotAccessControlException.class);
2660    }
2661  }
2662
2663  /**
2664   * @deprecated use {@link HdfsDataInputStream} instead.
2665   */
2666  @Deprecated
2667  public static class DFSDataInputStream extends HdfsDataInputStream {
2668
2669    public DFSDataInputStream(DFSInputStream in) throws IOException {
2670      super(in);
2671    }
2672  }
2673
2674  void reportChecksumFailure(String file, ExtendedBlock blk, DatanodeInfo dn) {
2675    DatanodeInfo [] dnArr = { dn };
2676    LocatedBlock [] lblocks = { new LocatedBlock(blk, dnArr) };
2677    reportChecksumFailure(file, lblocks);
2678  }
2679    
2680  // just reports checksum failure and ignores any exception during the report.
2681  void reportChecksumFailure(String file, LocatedBlock lblocks[]) {
2682    try {
2683      reportBadBlocks(lblocks);
2684    } catch (IOException ie) {
2685      LOG.info("Found corruption while reading " + file
2686          + ". Error repairing corrupt blocks. Bad blocks remain.", ie);
2687    }
2688  }
2689
2690  @Override
2691  public String toString() {
2692    return getClass().getSimpleName() + "[clientName=" + clientName
2693        + ", ugi=" + ugi + "]"; 
2694  }
2695
2696  public CachingStrategy getDefaultReadCachingStrategy() {
2697    return defaultReadCachingStrategy;
2698  }
2699
2700  public CachingStrategy getDefaultWriteCachingStrategy() {
2701    return defaultWriteCachingStrategy;
2702  }
2703
2704  public ClientContext getClientContext() {
2705    return clientContext;
2706  }
2707
2708  public void modifyAclEntries(String src, List<AclEntry> aclSpec)
2709      throws IOException {
2710    checkOpen();
2711    try {
2712      namenode.modifyAclEntries(src, aclSpec);
2713    } catch(RemoteException re) {
2714      throw re.unwrapRemoteException(AccessControlException.class,
2715                                     AclException.class,
2716                                     FileNotFoundException.class,
2717                                     NSQuotaExceededException.class,
2718                                     SafeModeException.class,
2719                                     SnapshotAccessControlException.class,
2720                                     UnresolvedPathException.class);
2721    }
2722  }
2723
2724  public void removeAclEntries(String src, List<AclEntry> aclSpec)
2725      throws IOException {
2726    checkOpen();
2727    try {
2728      namenode.removeAclEntries(src, aclSpec);
2729    } catch(RemoteException re) {
2730      throw re.unwrapRemoteException(AccessControlException.class,
2731                                     AclException.class,
2732                                     FileNotFoundException.class,
2733                                     NSQuotaExceededException.class,
2734                                     SafeModeException.class,
2735                                     SnapshotAccessControlException.class,
2736                                     UnresolvedPathException.class);
2737    }
2738  }
2739
2740  public void removeDefaultAcl(String src) throws IOException {
2741    checkOpen();
2742    try {
2743      namenode.removeDefaultAcl(src);
2744    } catch(RemoteException re) {
2745      throw re.unwrapRemoteException(AccessControlException.class,
2746                                     AclException.class,
2747                                     FileNotFoundException.class,
2748                                     NSQuotaExceededException.class,
2749                                     SafeModeException.class,
2750                                     SnapshotAccessControlException.class,
2751                                     UnresolvedPathException.class);
2752    }
2753  }
2754
2755  public void removeAcl(String src) throws IOException {
2756    checkOpen();
2757    try {
2758      namenode.removeAcl(src);
2759    } catch(RemoteException re) {
2760      throw re.unwrapRemoteException(AccessControlException.class,
2761                                     AclException.class,
2762                                     FileNotFoundException.class,
2763                                     NSQuotaExceededException.class,
2764                                     SafeModeException.class,
2765                                     SnapshotAccessControlException.class,
2766                                     UnresolvedPathException.class);
2767    }
2768  }
2769
2770  public void setAcl(String src, List<AclEntry> aclSpec) throws IOException {
2771    checkOpen();
2772    try {
2773      namenode.setAcl(src, aclSpec);
2774    } catch(RemoteException re) {
2775      throw re.unwrapRemoteException(AccessControlException.class,
2776                                     AclException.class,
2777                                     FileNotFoundException.class,
2778                                     NSQuotaExceededException.class,
2779                                     SafeModeException.class,
2780                                     SnapshotAccessControlException.class,
2781                                     UnresolvedPathException.class);
2782    }
2783  }
2784
2785  public AclStatus getAclStatus(String src) throws IOException {
2786    checkOpen();
2787    try {
2788      return namenode.getAclStatus(src);
2789    } catch(RemoteException re) {
2790      throw re.unwrapRemoteException(AccessControlException.class,
2791                                     AclException.class,
2792                                     FileNotFoundException.class,
2793                                     UnresolvedPathException.class);
2794    }
2795  }
2796  
2797  public void setXAttr(String src, String name, byte[] value, 
2798      EnumSet<XAttrSetFlag> flag) throws IOException {
2799    checkOpen();
2800    try {
2801      namenode.setXAttr(src, XAttrHelper.buildXAttr(name, value), flag);
2802    } catch (RemoteException re) {
2803      throw re.unwrapRemoteException(AccessControlException.class,
2804                                     FileNotFoundException.class,
2805                                     NSQuotaExceededException.class,
2806                                     SafeModeException.class,
2807                                     SnapshotAccessControlException.class,
2808                                     UnresolvedPathException.class);
2809    }
2810  }
2811  
2812  public byte[] getXAttr(String src, String name) throws IOException {
2813    checkOpen();
2814    try {
2815      final List<XAttr> xAttrs = XAttrHelper.buildXAttrAsList(name);
2816      final List<XAttr> result = namenode.getXAttrs(src, xAttrs);
2817      return XAttrHelper.getFirstXAttrValue(result);
2818    } catch(RemoteException re) {
2819      throw re.unwrapRemoteException(AccessControlException.class,
2820                                     FileNotFoundException.class,
2821                                     UnresolvedPathException.class);
2822    }
2823  }
2824  
2825  public Map<String, byte[]> getXAttrs(String src) throws IOException {
2826    checkOpen();
2827    try {
2828      return XAttrHelper.buildXAttrMap(namenode.getXAttrs(src, null));
2829    } catch(RemoteException re) {
2830      throw re.unwrapRemoteException(AccessControlException.class,
2831                                     FileNotFoundException.class,
2832                                     UnresolvedPathException.class);
2833    }
2834  }
2835  
2836  public Map<String, byte[]> getXAttrs(String src, List<String> names) 
2837      throws IOException {
2838    checkOpen();
2839    try {
2840      return XAttrHelper.buildXAttrMap(namenode.getXAttrs(
2841          src, XAttrHelper.buildXAttrs(names)));
2842    } catch(RemoteException re) {
2843      throw re.unwrapRemoteException(AccessControlException.class,
2844                                     FileNotFoundException.class,
2845                                     UnresolvedPathException.class);
2846    }
2847  }
2848  
2849  public List<String> listXAttrs(String src)
2850          throws IOException {
2851    checkOpen();
2852    try {
2853      final Map<String, byte[]> xattrs =
2854        XAttrHelper.buildXAttrMap(namenode.listXAttrs(src));
2855      return Lists.newArrayList(xattrs.keySet());
2856    } catch(RemoteException re) {
2857      throw re.unwrapRemoteException(AccessControlException.class,
2858                                     FileNotFoundException.class,
2859                                     UnresolvedPathException.class);
2860    }
2861  }
2862
2863  public void removeXAttr(String src, String name) throws IOException {
2864    checkOpen();
2865    try {
2866      namenode.removeXAttr(src, XAttrHelper.buildXAttr(name));
2867    } catch(RemoteException re) {
2868      throw re.unwrapRemoteException(AccessControlException.class,
2869                                     FileNotFoundException.class,
2870                                     NSQuotaExceededException.class,
2871                                     SafeModeException.class,
2872                                     SnapshotAccessControlException.class,
2873                                     UnresolvedPathException.class);
2874    }
2875  }
2876
2877  @Override // RemotePeerFactory
2878  public Peer newConnectedPeer(InetSocketAddress addr) throws IOException {
2879    Peer peer = null;
2880    boolean success = false;
2881    Socket sock = null;
2882    try {
2883      sock = socketFactory.createSocket();
2884      NetUtils.connect(sock, addr,
2885        getRandomLocalInterfaceAddr(),
2886        dfsClientConf.socketTimeout);
2887      peer = TcpPeerServer.peerFromSocketAndKey(sock, 
2888          getDataEncryptionKey());
2889      success = true;
2890      return peer;
2891    } finally {
2892      if (!success) {
2893        IOUtils.cleanup(LOG, peer);
2894        IOUtils.closeSocket(sock);
2895      }
2896    }
2897  }
2898
2899  /**
2900   * Create hedged reads thread pool, HEDGED_READ_THREAD_POOL, if
2901   * it does not already exist.
2902   * @param num Number of threads for hedged reads thread pool.
2903   * If zero, skip hedged reads thread pool creation.
2904   */
2905  private synchronized void initThreadsNumForHedgedReads(int num) {
2906    if (num <= 0 || HEDGED_READ_THREAD_POOL != null) return;
2907    HEDGED_READ_THREAD_POOL = new ThreadPoolExecutor(1, num, 60,
2908        TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
2909        new Daemon.DaemonFactory() {
2910          private final AtomicInteger threadIndex =
2911            new AtomicInteger(0); 
2912          @Override
2913          public Thread newThread(Runnable r) {
2914            Thread t = super.newThread(r);
2915            t.setName("hedgedRead-" +
2916              threadIndex.getAndIncrement());
2917            return t;
2918          }
2919        },
2920        new ThreadPoolExecutor.CallerRunsPolicy() {
2921
2922      @Override
2923      public void rejectedExecution(Runnable runnable,
2924          ThreadPoolExecutor e) {
2925        LOG.info("Execution rejected, Executing in current thread");
2926        HEDGED_READ_METRIC.incHedgedReadOpsInCurThread();
2927        // will run in the current thread
2928        super.rejectedExecution(runnable, e);
2929      }
2930    });
2931    HEDGED_READ_THREAD_POOL.allowCoreThreadTimeOut(true);
2932    if (LOG.isDebugEnabled()) {
2933      LOG.debug("Using hedged reads; pool threads=" + num);
2934    }
2935  }
2936
2937  long getHedgedReadTimeout() {
2938    return this.hedgedReadThresholdMillis;
2939  }
2940
2941  @VisibleForTesting
2942  void setHedgedReadTimeout(long timeoutMillis) {
2943    this.hedgedReadThresholdMillis = timeoutMillis;
2944  }
2945
2946  ThreadPoolExecutor getHedgedReadsThreadPool() {
2947    return HEDGED_READ_THREAD_POOL;
2948  }
2949
2950  boolean isHedgedReadsEnabled() {
2951    return (HEDGED_READ_THREAD_POOL != null) &&
2952      HEDGED_READ_THREAD_POOL.getMaximumPoolSize() > 0;
2953  }
2954
2955  DFSHedgedReadMetrics getHedgedReadMetrics() {
2956    return HEDGED_READ_METRIC;
2957  }
2958}