001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs; 019 020import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.ShortCircuitFdResponse.USE_RECEIPT_VERIFICATION; 021 022import java.io.BufferedOutputStream; 023import java.io.DataInputStream; 024import java.io.DataOutputStream; 025import java.io.FileInputStream; 026import java.io.IOException; 027import java.net.InetSocketAddress; 028 029import org.apache.commons.lang.mutable.MutableBoolean; 030import org.apache.commons.logging.Log; 031import org.apache.commons.logging.LogFactory; 032import org.apache.hadoop.classification.InterfaceAudience; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.hdfs.net.DomainPeer; 035import org.apache.hadoop.hdfs.net.Peer; 036import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 037import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 038import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException; 039import org.apache.hadoop.hdfs.protocol.datatransfer.Sender; 040import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto; 041import org.apache.hadoop.hdfs.protocolPB.PBHelper; 042import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; 043import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; 044import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; 045import org.apache.hadoop.hdfs.shortcircuit.DomainSocketFactory; 046import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache; 047import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.ShortCircuitReplicaCreator; 048import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplica; 049import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplicaInfo; 050import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.Slot; 051import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.SlotId; 052import org.apache.hadoop.io.IOUtils; 053import org.apache.hadoop.ipc.RemoteException; 054import org.apache.hadoop.net.unix.DomainSocket; 055import org.apache.hadoop.security.AccessControlException; 056import org.apache.hadoop.security.UserGroupInformation; 057import org.apache.hadoop.security.token.SecretManager.InvalidToken; 058import org.apache.hadoop.security.token.Token; 059import org.apache.hadoop.util.PerformanceAdvisory; 060import org.apache.hadoop.util.Time; 061 062import com.google.common.annotations.VisibleForTesting; 063import com.google.common.base.Preconditions; 064 065 066/** 067 * Utility class to create BlockReader implementations. 068 */ 069@InterfaceAudience.Private 070public class BlockReaderFactory implements ShortCircuitReplicaCreator { 071 static final Log LOG = LogFactory.getLog(BlockReaderFactory.class); 072 073 public static class FailureInjector { 074 public void injectRequestFileDescriptorsFailure() throws IOException { 075 // do nothing 076 } 077 public boolean getSupportsReceiptVerification() { 078 return true; 079 } 080 } 081 082 @VisibleForTesting 083 static ShortCircuitReplicaCreator 084 createShortCircuitReplicaInfoCallback = null; 085 086 private final DFSClient.Conf conf; 087 088 /** 089 * Injects failures into specific operations during unit tests. 090 */ 091 private final FailureInjector failureInjector; 092 093 /** 094 * The file name, for logging and debugging purposes. 095 */ 096 private String fileName; 097 098 /** 099 * The block ID and block pool ID to use. 100 */ 101 private ExtendedBlock block; 102 103 /** 104 * The block token to use for security purposes. 105 */ 106 private Token<BlockTokenIdentifier> token; 107 108 /** 109 * The offset within the block to start reading at. 110 */ 111 private long startOffset; 112 113 /** 114 * If false, we won't try to verify the block checksum. 115 */ 116 private boolean verifyChecksum; 117 118 /** 119 * The name of this client. 120 */ 121 private String clientName; 122 123 /** 124 * The DataNode we're talking to. 125 */ 126 private DatanodeInfo datanode; 127 128 /** 129 * StorageType of replica on DataNode. 130 */ 131 private StorageType storageType; 132 133 /** 134 * If false, we won't try short-circuit local reads. 135 */ 136 private boolean allowShortCircuitLocalReads; 137 138 /** 139 * The ClientContext to use for things like the PeerCache. 140 */ 141 private ClientContext clientContext; 142 143 /** 144 * Number of bytes to read. -1 indicates no limit. 145 */ 146 private long length = -1; 147 148 /** 149 * Caching strategy to use when reading the block. 150 */ 151 private CachingStrategy cachingStrategy; 152 153 /** 154 * Socket address to use to connect to peer. 155 */ 156 private InetSocketAddress inetSocketAddress; 157 158 /** 159 * Remote peer factory to use to create a peer, if needed. 160 */ 161 private RemotePeerFactory remotePeerFactory; 162 163 /** 164 * UserGroupInformation to use for legacy block reader local objects, if needed. 165 */ 166 private UserGroupInformation userGroupInformation; 167 168 /** 169 * Configuration to use for legacy block reader local objects, if needed. 170 */ 171 private Configuration configuration; 172 173 /** 174 * Information about the domain socket path we should use to connect to the 175 * local peer-- or null if we haven't examined the local domain socket. 176 */ 177 private DomainSocketFactory.PathInfo pathInfo; 178 179 /** 180 * The remaining number of times that we'll try to pull a socket out of the 181 * cache. 182 */ 183 private int remainingCacheTries; 184 185 public BlockReaderFactory(DFSClient.Conf conf) { 186 this.conf = conf; 187 this.failureInjector = conf.brfFailureInjector; 188 this.remainingCacheTries = conf.nCachedConnRetry; 189 } 190 191 public BlockReaderFactory setFileName(String fileName) { 192 this.fileName = fileName; 193 return this; 194 } 195 196 public BlockReaderFactory setBlock(ExtendedBlock block) { 197 this.block = block; 198 return this; 199 } 200 201 public BlockReaderFactory setBlockToken(Token<BlockTokenIdentifier> token) { 202 this.token = token; 203 return this; 204 } 205 206 public BlockReaderFactory setStartOffset(long startOffset) { 207 this.startOffset = startOffset; 208 return this; 209 } 210 211 public BlockReaderFactory setVerifyChecksum(boolean verifyChecksum) { 212 this.verifyChecksum = verifyChecksum; 213 return this; 214 } 215 216 public BlockReaderFactory setClientName(String clientName) { 217 this.clientName = clientName; 218 return this; 219 } 220 221 public BlockReaderFactory setDatanodeInfo(DatanodeInfo datanode) { 222 this.datanode = datanode; 223 return this; 224 } 225 226 public BlockReaderFactory setStorageType(StorageType storageType) { 227 this.storageType = storageType; 228 return this; 229 } 230 231 public BlockReaderFactory setAllowShortCircuitLocalReads( 232 boolean allowShortCircuitLocalReads) { 233 this.allowShortCircuitLocalReads = allowShortCircuitLocalReads; 234 return this; 235 } 236 237 public BlockReaderFactory setClientCacheContext( 238 ClientContext clientContext) { 239 this.clientContext = clientContext; 240 return this; 241 } 242 243 public BlockReaderFactory setLength(long length) { 244 this.length = length; 245 return this; 246 } 247 248 public BlockReaderFactory setCachingStrategy( 249 CachingStrategy cachingStrategy) { 250 this.cachingStrategy = cachingStrategy; 251 return this; 252 } 253 254 public BlockReaderFactory setInetSocketAddress ( 255 InetSocketAddress inetSocketAddress) { 256 this.inetSocketAddress = inetSocketAddress; 257 return this; 258 } 259 260 public BlockReaderFactory setUserGroupInformation( 261 UserGroupInformation userGroupInformation) { 262 this.userGroupInformation = userGroupInformation; 263 return this; 264 } 265 266 public BlockReaderFactory setRemotePeerFactory( 267 RemotePeerFactory remotePeerFactory) { 268 this.remotePeerFactory = remotePeerFactory; 269 return this; 270 } 271 272 public BlockReaderFactory setConfiguration( 273 Configuration configuration) { 274 this.configuration = configuration; 275 return this; 276 } 277 278 /** 279 * Build a BlockReader with the given options. 280 * 281 * This function will do the best it can to create a block reader that meets 282 * all of our requirements. We prefer short-circuit block readers 283 * (BlockReaderLocal and BlockReaderLocalLegacy) over remote ones, since the 284 * former avoid the overhead of socket communication. If short-circuit is 285 * unavailable, our next fallback is data transfer over UNIX domain sockets, 286 * if dfs.client.domain.socket.data.traffic has been enabled. If that doesn't 287 * work, we will try to create a remote block reader that operates over TCP 288 * sockets. 289 * 290 * There are a few caches that are important here. 291 * 292 * The ShortCircuitCache stores file descriptor objects which have been passed 293 * from the DataNode. 294 * 295 * The DomainSocketFactory stores information about UNIX domain socket paths 296 * that we not been able to use in the past, so that we don't waste time 297 * retrying them over and over. (Like all the caches, it does have a timeout, 298 * though.) 299 * 300 * The PeerCache stores peers that we have used in the past. If we can reuse 301 * one of these peers, we avoid the overhead of re-opening a socket. However, 302 * if the socket has been timed out on the remote end, our attempt to reuse 303 * the socket may end with an IOException. For that reason, we limit our 304 * attempts at socket reuse to dfs.client.cached.conn.retry times. After 305 * that, we create new sockets. This avoids the problem where a thread tries 306 * to talk to a peer that it hasn't talked to in a while, and has to clean out 307 * every entry in a socket cache full of stale entries. 308 * 309 * @return The new BlockReader. We will not return null. 310 * 311 * @throws InvalidToken 312 * If the block token was invalid. 313 * InvalidEncryptionKeyException 314 * If the encryption key was invalid. 315 * Other IOException 316 * If there was another problem. 317 */ 318 public BlockReader build() throws IOException { 319 BlockReader reader = null; 320 321 Preconditions.checkNotNull(configuration); 322 if (conf.shortCircuitLocalReads && allowShortCircuitLocalReads) { 323 if (clientContext.getUseLegacyBlockReaderLocal()) { 324 reader = getLegacyBlockReaderLocal(); 325 if (reader != null) { 326 if (LOG.isTraceEnabled()) { 327 LOG.trace(this + ": returning new legacy block reader local."); 328 } 329 return reader; 330 } 331 } else { 332 reader = getBlockReaderLocal(); 333 if (reader != null) { 334 if (LOG.isTraceEnabled()) { 335 LOG.trace(this + ": returning new block reader local."); 336 } 337 return reader; 338 } 339 } 340 } 341 if (conf.domainSocketDataTraffic) { 342 reader = getRemoteBlockReaderFromDomain(); 343 if (reader != null) { 344 if (LOG.isTraceEnabled()) { 345 LOG.trace(this + ": returning new remote block reader using " + 346 "UNIX domain socket on " + pathInfo.getPath()); 347 } 348 return reader; 349 } 350 } 351 Preconditions.checkState(!DFSInputStream.tcpReadsDisabledForTesting, 352 "TCP reads were disabled for testing, but we failed to " + 353 "do a non-TCP read."); 354 return getRemoteBlockReaderFromTcp(); 355 } 356 357 /** 358 * Get {@link BlockReaderLocalLegacy} for short circuited local reads. 359 * This block reader implements the path-based style of local reads 360 * first introduced in HDFS-2246. 361 */ 362 private BlockReader getLegacyBlockReaderLocal() throws IOException { 363 if (LOG.isTraceEnabled()) { 364 LOG.trace(this + ": trying to construct BlockReaderLocalLegacy"); 365 } 366 if (!DFSClient.isLocalAddress(inetSocketAddress)) { 367 if (LOG.isTraceEnabled()) { 368 LOG.trace(this + ": can't construct BlockReaderLocalLegacy because " + 369 "the address " + inetSocketAddress + " is not local"); 370 } 371 return null; 372 } 373 if (clientContext.getDisableLegacyBlockReaderLocal()) { 374 PerformanceAdvisory.LOG.debug(this + ": can't construct " + 375 "BlockReaderLocalLegacy because " + 376 "disableLegacyBlockReaderLocal is set."); 377 return null; 378 } 379 IOException ioe = null; 380 try { 381 return BlockReaderLocalLegacy.newBlockReader(conf, 382 userGroupInformation, configuration, fileName, block, token, 383 datanode, startOffset, length, storageType); 384 } catch (RemoteException remoteException) { 385 ioe = remoteException.unwrapRemoteException( 386 InvalidToken.class, AccessControlException.class); 387 } catch (IOException e) { 388 ioe = e; 389 } 390 if ((!(ioe instanceof AccessControlException)) && 391 isSecurityException(ioe)) { 392 // Handle security exceptions. 393 // We do not handle AccessControlException here, since 394 // BlockReaderLocalLegacy#newBlockReader uses that exception to indicate 395 // that the user is not in dfs.block.local-path-access.user, a condition 396 // which requires us to disable legacy SCR. 397 throw ioe; 398 } 399 LOG.warn(this + ": error creating legacy BlockReaderLocal. " + 400 "Disabling legacy local reads.", ioe); 401 clientContext.setDisableLegacyBlockReaderLocal(); 402 return null; 403 } 404 405 private BlockReader getBlockReaderLocal() throws InvalidToken { 406 if (LOG.isTraceEnabled()) { 407 LOG.trace(this + ": trying to construct a BlockReaderLocal " + 408 "for short-circuit reads."); 409 } 410 if (pathInfo == null) { 411 pathInfo = clientContext.getDomainSocketFactory(). 412 getPathInfo(inetSocketAddress, conf); 413 } 414 if (!pathInfo.getPathState().getUsableForShortCircuit()) { 415 PerformanceAdvisory.LOG.debug(this + ": " + pathInfo + " is not " + 416 "usable for short circuit; giving up on BlockReaderLocal."); 417 return null; 418 } 419 ShortCircuitCache cache = clientContext.getShortCircuitCache(); 420 ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId()); 421 ShortCircuitReplicaInfo info = cache.fetchOrCreate(key, this); 422 InvalidToken exc = info.getInvalidTokenException(); 423 if (exc != null) { 424 if (LOG.isTraceEnabled()) { 425 LOG.trace(this + ": got InvalidToken exception while trying to " + 426 "construct BlockReaderLocal via " + pathInfo.getPath()); 427 } 428 throw exc; 429 } 430 if (info.getReplica() == null) { 431 if (LOG.isTraceEnabled()) { 432 PerformanceAdvisory.LOG.debug(this + ": failed to get " + 433 "ShortCircuitReplica. Cannot construct " + 434 "BlockReaderLocal via " + pathInfo.getPath()); 435 } 436 return null; 437 } 438 return new BlockReaderLocal.Builder(conf). 439 setFilename(fileName). 440 setBlock(block). 441 setStartOffset(startOffset). 442 setShortCircuitReplica(info.getReplica()). 443 setVerifyChecksum(verifyChecksum). 444 setCachingStrategy(cachingStrategy). 445 setStorageType(storageType). 446 build(); 447 } 448 449 /** 450 * Fetch a pair of short-circuit block descriptors from a local DataNode. 451 * 452 * @return Null if we could not communicate with the datanode, 453 * a new ShortCircuitReplicaInfo object otherwise. 454 * ShortCircuitReplicaInfo objects may contain either an InvalidToken 455 * exception, or a ShortCircuitReplica object ready to use. 456 */ 457 @Override 458 public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() { 459 if (createShortCircuitReplicaInfoCallback != null) { 460 ShortCircuitReplicaInfo info = 461 createShortCircuitReplicaInfoCallback.createShortCircuitReplicaInfo(); 462 if (info != null) return info; 463 } 464 if (LOG.isTraceEnabled()) { 465 LOG.trace(this + ": trying to create ShortCircuitReplicaInfo."); 466 } 467 BlockReaderPeer curPeer; 468 while (true) { 469 curPeer = nextDomainPeer(); 470 if (curPeer == null) break; 471 if (curPeer.fromCache) remainingCacheTries--; 472 DomainPeer peer = (DomainPeer)curPeer.peer; 473 Slot slot = null; 474 ShortCircuitCache cache = clientContext.getShortCircuitCache(); 475 try { 476 MutableBoolean usedPeer = new MutableBoolean(false); 477 slot = cache.allocShmSlot(datanode, peer, usedPeer, 478 new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId()), 479 clientName); 480 if (usedPeer.booleanValue()) { 481 if (LOG.isTraceEnabled()) { 482 LOG.trace(this + ": allocShmSlot used up our previous socket " + 483 peer.getDomainSocket() + ". Allocating a new one..."); 484 } 485 curPeer = nextDomainPeer(); 486 if (curPeer == null) break; 487 peer = (DomainPeer)curPeer.peer; 488 } 489 ShortCircuitReplicaInfo info = requestFileDescriptors(peer, slot); 490 clientContext.getPeerCache().put(datanode, peer); 491 return info; 492 } catch (IOException e) { 493 if (slot != null) { 494 cache.freeSlot(slot); 495 } 496 if (curPeer.fromCache) { 497 // Handle an I/O error we got when using a cached socket. 498 // These are considered less serious, because the socket may be stale. 499 if (LOG.isDebugEnabled()) { 500 LOG.debug(this + ": closing stale domain peer " + peer, e); 501 } 502 IOUtils.cleanup(LOG, peer); 503 } else { 504 // Handle an I/O error we got when using a newly created socket. 505 // We temporarily disable the domain socket path for a few minutes in 506 // this case, to prevent wasting more time on it. 507 LOG.warn(this + ": I/O error requesting file descriptors. " + 508 "Disabling domain socket " + peer.getDomainSocket(), e); 509 IOUtils.cleanup(LOG, peer); 510 clientContext.getDomainSocketFactory() 511 .disableDomainSocketPath(pathInfo.getPath()); 512 return null; 513 } 514 } 515 } 516 return null; 517 } 518 519 /** 520 * Request file descriptors from a DomainPeer. 521 * 522 * @param peer The peer to use for communication. 523 * @param slot If non-null, the shared memory slot to associate with the 524 * new ShortCircuitReplica. 525 * 526 * @return A ShortCircuitReplica object if we could communicate with the 527 * datanode; null, otherwise. 528 * @throws IOException If we encountered an I/O exception while communicating 529 * with the datanode. 530 */ 531 private ShortCircuitReplicaInfo requestFileDescriptors(DomainPeer peer, 532 Slot slot) throws IOException { 533 ShortCircuitCache cache = clientContext.getShortCircuitCache(); 534 final DataOutputStream out = 535 new DataOutputStream(new BufferedOutputStream(peer.getOutputStream())); 536 SlotId slotId = slot == null ? null : slot.getSlotId(); 537 new Sender(out).requestShortCircuitFds(block, token, slotId, 1, 538 failureInjector.getSupportsReceiptVerification()); 539 DataInputStream in = new DataInputStream(peer.getInputStream()); 540 BlockOpResponseProto resp = BlockOpResponseProto.parseFrom( 541 PBHelper.vintPrefixed(in)); 542 DomainSocket sock = peer.getDomainSocket(); 543 failureInjector.injectRequestFileDescriptorsFailure(); 544 switch (resp.getStatus()) { 545 case SUCCESS: 546 byte buf[] = new byte[1]; 547 FileInputStream fis[] = new FileInputStream[2]; 548 sock.recvFileInputStreams(fis, buf, 0, buf.length); 549 ShortCircuitReplica replica = null; 550 try { 551 ExtendedBlockId key = 552 new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId()); 553 if (buf[0] == USE_RECEIPT_VERIFICATION.getNumber()) { 554 LOG.trace("Sending receipt verification byte for slot " + slot); 555 sock.getOutputStream().write(0); 556 } 557 replica = new ShortCircuitReplica(key, fis[0], fis[1], cache, 558 Time.monotonicNow(), slot); 559 return new ShortCircuitReplicaInfo(replica); 560 } catch (IOException e) { 561 // This indicates an error reading from disk, or a format error. Since 562 // it's not a socket communication problem, we return null rather than 563 // throwing an exception. 564 LOG.warn(this + ": error creating ShortCircuitReplica.", e); 565 return null; 566 } finally { 567 if (replica == null) { 568 IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]); 569 } 570 } 571 case ERROR_UNSUPPORTED: 572 if (!resp.hasShortCircuitAccessVersion()) { 573 LOG.warn("short-circuit read access is disabled for " + 574 "DataNode " + datanode + ". reason: " + resp.getMessage()); 575 clientContext.getDomainSocketFactory() 576 .disableShortCircuitForPath(pathInfo.getPath()); 577 } else { 578 LOG.warn("short-circuit read access for the file " + 579 fileName + " is disabled for DataNode " + datanode + 580 ". reason: " + resp.getMessage()); 581 } 582 return null; 583 case ERROR_ACCESS_TOKEN: 584 String msg = "access control error while " + 585 "attempting to set up short-circuit access to " + 586 fileName + resp.getMessage(); 587 if (LOG.isDebugEnabled()) { 588 LOG.debug(this + ":" + msg); 589 } 590 return new ShortCircuitReplicaInfo(new InvalidToken(msg)); 591 default: 592 LOG.warn(this + ": unknown response code " + resp.getStatus() + 593 " while attempting to set up short-circuit access. " + 594 resp.getMessage()); 595 clientContext.getDomainSocketFactory() 596 .disableShortCircuitForPath(pathInfo.getPath()); 597 return null; 598 } 599 } 600 601 /** 602 * Get a RemoteBlockReader that communicates over a UNIX domain socket. 603 * 604 * @return The new BlockReader, or null if we failed to create the block 605 * reader. 606 * 607 * @throws InvalidToken If the block token was invalid. 608 * Potentially other security-related execptions. 609 */ 610 private BlockReader getRemoteBlockReaderFromDomain() throws IOException { 611 if (pathInfo == null) { 612 pathInfo = clientContext.getDomainSocketFactory(). 613 getPathInfo(inetSocketAddress, conf); 614 } 615 if (!pathInfo.getPathState().getUsableForDataTransfer()) { 616 PerformanceAdvisory.LOG.debug(this + ": not trying to create a " + 617 "remote block reader because the UNIX domain socket at " + 618 pathInfo + " is not usable."); 619 return null; 620 } 621 if (LOG.isTraceEnabled()) { 622 LOG.trace(this + ": trying to create a remote block reader from the " + 623 "UNIX domain socket at " + pathInfo.getPath()); 624 } 625 626 while (true) { 627 BlockReaderPeer curPeer = nextDomainPeer(); 628 if (curPeer == null) break; 629 if (curPeer.fromCache) remainingCacheTries--; 630 DomainPeer peer = (DomainPeer)curPeer.peer; 631 BlockReader blockReader = null; 632 try { 633 blockReader = getRemoteBlockReader(peer); 634 return blockReader; 635 } catch (IOException ioe) { 636 IOUtils.cleanup(LOG, peer); 637 if (isSecurityException(ioe)) { 638 if (LOG.isTraceEnabled()) { 639 LOG.trace(this + ": got security exception while constructing " + 640 "a remote block reader from the unix domain socket at " + 641 pathInfo.getPath(), ioe); 642 } 643 throw ioe; 644 } 645 if (curPeer.fromCache) { 646 // Handle an I/O error we got when using a cached peer. These are 647 // considered less serious, because the underlying socket may be stale. 648 if (LOG.isDebugEnabled()) { 649 LOG.debug("Closed potentially stale domain peer " + peer, ioe); 650 } 651 } else { 652 // Handle an I/O error we got when using a newly created domain peer. 653 // We temporarily disable the domain socket path for a few minutes in 654 // this case, to prevent wasting more time on it. 655 LOG.warn("I/O error constructing remote block reader. Disabling " + 656 "domain socket " + peer.getDomainSocket(), ioe); 657 clientContext.getDomainSocketFactory() 658 .disableDomainSocketPath(pathInfo.getPath()); 659 return null; 660 } 661 } finally { 662 if (blockReader == null) { 663 IOUtils.cleanup(LOG, peer); 664 } 665 } 666 } 667 return null; 668 } 669 670 /** 671 * Get a RemoteBlockReader that communicates over a TCP socket. 672 * 673 * @return The new BlockReader. We will not return null, but instead throw 674 * an exception if this fails. 675 * 676 * @throws InvalidToken 677 * If the block token was invalid. 678 * InvalidEncryptionKeyException 679 * If the encryption key was invalid. 680 * Other IOException 681 * If there was another problem. 682 */ 683 private BlockReader getRemoteBlockReaderFromTcp() throws IOException { 684 if (LOG.isTraceEnabled()) { 685 LOG.trace(this + ": trying to create a remote block reader from a " + 686 "TCP socket"); 687 } 688 BlockReader blockReader = null; 689 while (true) { 690 BlockReaderPeer curPeer = null; 691 Peer peer = null; 692 try { 693 curPeer = nextTcpPeer(); 694 if (curPeer == null) break; 695 if (curPeer.fromCache) remainingCacheTries--; 696 peer = curPeer.peer; 697 blockReader = getRemoteBlockReader(peer); 698 return blockReader; 699 } catch (IOException ioe) { 700 if (isSecurityException(ioe)) { 701 if (LOG.isTraceEnabled()) { 702 LOG.trace(this + ": got security exception while constructing " + 703 "a remote block reader from " + peer, ioe); 704 } 705 throw ioe; 706 } 707 if ((curPeer != null) && curPeer.fromCache) { 708 // Handle an I/O error we got when using a cached peer. These are 709 // considered less serious, because the underlying socket may be 710 // stale. 711 if (LOG.isDebugEnabled()) { 712 LOG.debug("Closed potentially stale remote peer " + peer, ioe); 713 } 714 } else { 715 // Handle an I/O error we got when using a newly created peer. 716 LOG.warn("I/O error constructing remote block reader.", ioe); 717 throw ioe; 718 } 719 } finally { 720 if (blockReader == null) { 721 IOUtils.cleanup(LOG, peer); 722 } 723 } 724 } 725 return null; 726 } 727 728 public static class BlockReaderPeer { 729 final Peer peer; 730 final boolean fromCache; 731 732 BlockReaderPeer(Peer peer, boolean fromCache) { 733 this.peer = peer; 734 this.fromCache = fromCache; 735 } 736 } 737 738 /** 739 * Get the next DomainPeer-- either from the cache or by creating it. 740 * 741 * @return the next DomainPeer, or null if we could not construct one. 742 */ 743 private BlockReaderPeer nextDomainPeer() { 744 if (remainingCacheTries > 0) { 745 Peer peer = clientContext.getPeerCache().get(datanode, true); 746 if (peer != null) { 747 if (LOG.isTraceEnabled()) { 748 LOG.trace("nextDomainPeer: reusing existing peer " + peer); 749 } 750 return new BlockReaderPeer(peer, true); 751 } 752 } 753 DomainSocket sock = clientContext.getDomainSocketFactory(). 754 createSocket(pathInfo, conf.socketTimeout); 755 if (sock == null) return null; 756 return new BlockReaderPeer(new DomainPeer(sock), false); 757 } 758 759 /** 760 * Get the next TCP-based peer-- either from the cache or by creating it. 761 * 762 * @return the next Peer, or null if we could not construct one. 763 * 764 * @throws IOException If there was an error while constructing the peer 765 * (such as an InvalidEncryptionKeyException) 766 */ 767 private BlockReaderPeer nextTcpPeer() throws IOException { 768 if (remainingCacheTries > 0) { 769 Peer peer = clientContext.getPeerCache().get(datanode, false); 770 if (peer != null) { 771 if (LOG.isTraceEnabled()) { 772 LOG.trace("nextTcpPeer: reusing existing peer " + peer); 773 } 774 return new BlockReaderPeer(peer, true); 775 } 776 } 777 try { 778 Peer peer = remotePeerFactory.newConnectedPeer(inetSocketAddress, token, 779 datanode); 780 if (LOG.isTraceEnabled()) { 781 LOG.trace("nextTcpPeer: created newConnectedPeer " + peer); 782 } 783 return new BlockReaderPeer(peer, false); 784 } catch (IOException e) { 785 if (LOG.isTraceEnabled()) { 786 LOG.trace("nextTcpPeer: failed to create newConnectedPeer " + 787 "connected to " + datanode); 788 } 789 throw e; 790 } 791 } 792 793 /** 794 * Determine if an exception is security-related. 795 * 796 * We need to handle these exceptions differently than other IOExceptions. 797 * They don't indicate a communication problem. Instead, they mean that there 798 * is some action the client needs to take, such as refetching block tokens, 799 * renewing encryption keys, etc. 800 * 801 * @param ioe The exception 802 * @return True only if the exception is security-related. 803 */ 804 private static boolean isSecurityException(IOException ioe) { 805 return (ioe instanceof InvalidToken) || 806 (ioe instanceof InvalidEncryptionKeyException) || 807 (ioe instanceof InvalidBlockTokenException) || 808 (ioe instanceof AccessControlException); 809 } 810 811 @SuppressWarnings("deprecation") 812 private BlockReader getRemoteBlockReader(Peer peer) throws IOException { 813 if (conf.useLegacyBlockReader) { 814 return RemoteBlockReader.newBlockReader(fileName, 815 block, token, startOffset, length, conf.ioBufferSize, 816 verifyChecksum, clientName, peer, datanode, 817 clientContext.getPeerCache(), cachingStrategy); 818 } else { 819 return RemoteBlockReader2.newBlockReader( 820 fileName, block, token, startOffset, length, 821 verifyChecksum, clientName, peer, datanode, 822 clientContext.getPeerCache(), cachingStrategy); 823 } 824 } 825 826 @Override 827 public String toString() { 828 return "BlockReaderFactory(fileName=" + fileName + ", block=" + block + ")"; 829 } 830 831 /** 832 * File name to print when accessing a block directly (from servlets) 833 * @param s Address of the block location 834 * @param poolId Block pool ID of the block 835 * @param blockId Block ID of the block 836 * @return string that has a file name for debug purposes 837 */ 838 public static String getFileName(final InetSocketAddress s, 839 final String poolId, final long blockId) { 840 return s.toString() + ":" + poolId + ":" + blockId; 841 } 842}