001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs; 019 020import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.ShortCircuitFdResponse.USE_RECEIPT_VERIFICATION; 021 022import java.io.BufferedOutputStream; 023import java.io.DataInputStream; 024import java.io.DataOutputStream; 025import java.io.FileInputStream; 026import java.io.IOException; 027import java.net.InetSocketAddress; 028 029import org.apache.commons.lang.mutable.MutableBoolean; 030import org.apache.commons.logging.Log; 031import org.apache.commons.logging.LogFactory; 032import org.apache.hadoop.classification.InterfaceAudience; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.fs.StorageType; 035import org.apache.hadoop.hdfs.net.DomainPeer; 036import org.apache.hadoop.hdfs.net.Peer; 037import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 038import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 039import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException; 040import org.apache.hadoop.hdfs.protocol.datatransfer.Sender; 041import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto; 042import org.apache.hadoop.hdfs.protocolPB.PBHelper; 043import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; 044import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; 045import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; 046import org.apache.hadoop.hdfs.shortcircuit.DomainSocketFactory; 047import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache; 048import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.ShortCircuitReplicaCreator; 049import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplica; 050import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplicaInfo; 051import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.Slot; 052import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.SlotId; 053import org.apache.hadoop.io.IOUtils; 054import org.apache.hadoop.ipc.RemoteException; 055import org.apache.hadoop.net.unix.DomainSocket; 056import org.apache.hadoop.security.AccessControlException; 057import org.apache.hadoop.security.UserGroupInformation; 058import org.apache.hadoop.security.token.SecretManager.InvalidToken; 059import org.apache.hadoop.security.token.Token; 060import org.apache.hadoop.util.PerformanceAdvisory; 061import org.apache.hadoop.util.Time; 062 063import com.google.common.annotations.VisibleForTesting; 064import com.google.common.base.Preconditions; 065 066 067/** 068 * Utility class to create BlockReader implementations. 069 */ 070@InterfaceAudience.Private 071public class BlockReaderFactory implements ShortCircuitReplicaCreator { 072 static final Log LOG = LogFactory.getLog(BlockReaderFactory.class); 073 074 public static class FailureInjector { 075 public void injectRequestFileDescriptorsFailure() throws IOException { 076 // do nothing 077 } 078 } 079 080 @VisibleForTesting 081 static ShortCircuitReplicaCreator 082 createShortCircuitReplicaInfoCallback = null; 083 084 private final DFSClient.Conf conf; 085 086 /** 087 * Injects failures into specific operations during unit tests. 088 */ 089 private final FailureInjector failureInjector; 090 091 /** 092 * The file name, for logging and debugging purposes. 093 */ 094 private String fileName; 095 096 /** 097 * The block ID and block pool ID to use. 098 */ 099 private ExtendedBlock block; 100 101 /** 102 * The block token to use for security purposes. 103 */ 104 private Token<BlockTokenIdentifier> token; 105 106 /** 107 * The offset within the block to start reading at. 108 */ 109 private long startOffset; 110 111 /** 112 * If false, we won't try to verify the block checksum. 113 */ 114 private boolean verifyChecksum; 115 116 /** 117 * The name of this client. 118 */ 119 private String clientName; 120 121 /** 122 * The DataNode we're talking to. 123 */ 124 private DatanodeInfo datanode; 125 126 /** 127 * StorageType of replica on DataNode. 128 */ 129 private StorageType storageType; 130 131 /** 132 * If false, we won't try short-circuit local reads. 133 */ 134 private boolean allowShortCircuitLocalReads; 135 136 /** 137 * The ClientContext to use for things like the PeerCache. 138 */ 139 private ClientContext clientContext; 140 141 /** 142 * Number of bytes to read. -1 indicates no limit. 143 */ 144 private long length = -1; 145 146 /** 147 * Caching strategy to use when reading the block. 148 */ 149 private CachingStrategy cachingStrategy; 150 151 /** 152 * Socket address to use to connect to peer. 153 */ 154 private InetSocketAddress inetSocketAddress; 155 156 /** 157 * Remote peer factory to use to create a peer, if needed. 158 */ 159 private RemotePeerFactory remotePeerFactory; 160 161 /** 162 * UserGroupInformation to use for legacy block reader local objects, if needed. 163 */ 164 private UserGroupInformation userGroupInformation; 165 166 /** 167 * Configuration to use for legacy block reader local objects, if needed. 168 */ 169 private Configuration configuration; 170 171 /** 172 * Information about the domain socket path we should use to connect to the 173 * local peer-- or null if we haven't examined the local domain socket. 174 */ 175 private DomainSocketFactory.PathInfo pathInfo; 176 177 /** 178 * The remaining number of times that we'll try to pull a socket out of the 179 * cache. 180 */ 181 private int remainingCacheTries; 182 183 public BlockReaderFactory(DFSClient.Conf conf) { 184 this.conf = conf; 185 this.failureInjector = conf.brfFailureInjector; 186 this.remainingCacheTries = conf.nCachedConnRetry; 187 } 188 189 public BlockReaderFactory setFileName(String fileName) { 190 this.fileName = fileName; 191 return this; 192 } 193 194 public BlockReaderFactory setBlock(ExtendedBlock block) { 195 this.block = block; 196 return this; 197 } 198 199 public BlockReaderFactory setBlockToken(Token<BlockTokenIdentifier> token) { 200 this.token = token; 201 return this; 202 } 203 204 public BlockReaderFactory setStartOffset(long startOffset) { 205 this.startOffset = startOffset; 206 return this; 207 } 208 209 public BlockReaderFactory setVerifyChecksum(boolean verifyChecksum) { 210 this.verifyChecksum = verifyChecksum; 211 return this; 212 } 213 214 public BlockReaderFactory setClientName(String clientName) { 215 this.clientName = clientName; 216 return this; 217 } 218 219 public BlockReaderFactory setDatanodeInfo(DatanodeInfo datanode) { 220 this.datanode = datanode; 221 return this; 222 } 223 224 public BlockReaderFactory setStorageType(StorageType storageType) { 225 this.storageType = storageType; 226 return this; 227 } 228 229 public BlockReaderFactory setAllowShortCircuitLocalReads( 230 boolean allowShortCircuitLocalReads) { 231 this.allowShortCircuitLocalReads = allowShortCircuitLocalReads; 232 return this; 233 } 234 235 public BlockReaderFactory setClientCacheContext( 236 ClientContext clientContext) { 237 this.clientContext = clientContext; 238 return this; 239 } 240 241 public BlockReaderFactory setLength(long length) { 242 this.length = length; 243 return this; 244 } 245 246 public BlockReaderFactory setCachingStrategy( 247 CachingStrategy cachingStrategy) { 248 this.cachingStrategy = cachingStrategy; 249 return this; 250 } 251 252 public BlockReaderFactory setInetSocketAddress ( 253 InetSocketAddress inetSocketAddress) { 254 this.inetSocketAddress = inetSocketAddress; 255 return this; 256 } 257 258 public BlockReaderFactory setUserGroupInformation( 259 UserGroupInformation userGroupInformation) { 260 this.userGroupInformation = userGroupInformation; 261 return this; 262 } 263 264 public BlockReaderFactory setRemotePeerFactory( 265 RemotePeerFactory remotePeerFactory) { 266 this.remotePeerFactory = remotePeerFactory; 267 return this; 268 } 269 270 public BlockReaderFactory setConfiguration( 271 Configuration configuration) { 272 this.configuration = configuration; 273 return this; 274 } 275 276 /** 277 * Build a BlockReader with the given options. 278 * 279 * This function will do the best it can to create a block reader that meets 280 * all of our requirements. We prefer short-circuit block readers 281 * (BlockReaderLocal and BlockReaderLocalLegacy) over remote ones, since the 282 * former avoid the overhead of socket communication. If short-circuit is 283 * unavailable, our next fallback is data transfer over UNIX domain sockets, 284 * if dfs.client.domain.socket.data.traffic has been enabled. If that doesn't 285 * work, we will try to create a remote block reader that operates over TCP 286 * sockets. 287 * 288 * There are a few caches that are important here. 289 * 290 * The ShortCircuitCache stores file descriptor objects which have been passed 291 * from the DataNode. 292 * 293 * The DomainSocketFactory stores information about UNIX domain socket paths 294 * that we not been able to use in the past, so that we don't waste time 295 * retrying them over and over. (Like all the caches, it does have a timeout, 296 * though.) 297 * 298 * The PeerCache stores peers that we have used in the past. If we can reuse 299 * one of these peers, we avoid the overhead of re-opening a socket. However, 300 * if the socket has been timed out on the remote end, our attempt to reuse 301 * the socket may end with an IOException. For that reason, we limit our 302 * attempts at socket reuse to dfs.client.cached.conn.retry times. After 303 * that, we create new sockets. This avoids the problem where a thread tries 304 * to talk to a peer that it hasn't talked to in a while, and has to clean out 305 * every entry in a socket cache full of stale entries. 306 * 307 * @return The new BlockReader. We will not return null. 308 * 309 * @throws InvalidToken 310 * If the block token was invalid. 311 * InvalidEncryptionKeyException 312 * If the encryption key was invalid. 313 * Other IOException 314 * If there was another problem. 315 */ 316 public BlockReader build() throws IOException { 317 BlockReader reader = null; 318 319 Preconditions.checkNotNull(configuration); 320 if (conf.shortCircuitLocalReads && allowShortCircuitLocalReads) { 321 if (clientContext.getUseLegacyBlockReaderLocal()) { 322 reader = getLegacyBlockReaderLocal(); 323 if (reader != null) { 324 if (LOG.isTraceEnabled()) { 325 LOG.trace(this + ": returning new legacy block reader local."); 326 } 327 return reader; 328 } 329 } else { 330 reader = getBlockReaderLocal(); 331 if (reader != null) { 332 if (LOG.isTraceEnabled()) { 333 LOG.trace(this + ": returning new block reader local."); 334 } 335 return reader; 336 } 337 } 338 } 339 if (conf.domainSocketDataTraffic) { 340 reader = getRemoteBlockReaderFromDomain(); 341 if (reader != null) { 342 if (LOG.isTraceEnabled()) { 343 LOG.trace(this + ": returning new remote block reader using " + 344 "UNIX domain socket on " + pathInfo.getPath()); 345 } 346 return reader; 347 } 348 } 349 Preconditions.checkState(!DFSInputStream.tcpReadsDisabledForTesting, 350 "TCP reads were disabled for testing, but we failed to " + 351 "do a non-TCP read."); 352 return getRemoteBlockReaderFromTcp(); 353 } 354 355 /** 356 * Get {@link BlockReaderLocalLegacy} for short circuited local reads. 357 * This block reader implements the path-based style of local reads 358 * first introduced in HDFS-2246. 359 */ 360 private BlockReader getLegacyBlockReaderLocal() throws IOException { 361 if (LOG.isTraceEnabled()) { 362 LOG.trace(this + ": trying to construct BlockReaderLocalLegacy"); 363 } 364 if (!DFSClient.isLocalAddress(inetSocketAddress)) { 365 if (LOG.isTraceEnabled()) { 366 LOG.trace(this + ": can't construct BlockReaderLocalLegacy because " + 367 "the address " + inetSocketAddress + " is not local"); 368 } 369 return null; 370 } 371 if (clientContext.getDisableLegacyBlockReaderLocal()) { 372 PerformanceAdvisory.LOG.debug(this + ": can't construct " + 373 "BlockReaderLocalLegacy because " + 374 "disableLegacyBlockReaderLocal is set."); 375 return null; 376 } 377 IOException ioe = null; 378 try { 379 return BlockReaderLocalLegacy.newBlockReader(conf, 380 userGroupInformation, configuration, fileName, block, token, 381 datanode, startOffset, length, storageType); 382 } catch (RemoteException remoteException) { 383 ioe = remoteException.unwrapRemoteException( 384 InvalidToken.class, AccessControlException.class); 385 } catch (IOException e) { 386 ioe = e; 387 } 388 if ((!(ioe instanceof AccessControlException)) && 389 isSecurityException(ioe)) { 390 // Handle security exceptions. 391 // We do not handle AccessControlException here, since 392 // BlockReaderLocalLegacy#newBlockReader uses that exception to indicate 393 // that the user is not in dfs.block.local-path-access.user, a condition 394 // which requires us to disable legacy SCR. 395 throw ioe; 396 } 397 LOG.warn(this + ": error creating legacy BlockReaderLocal. " + 398 "Disabling legacy local reads.", ioe); 399 clientContext.setDisableLegacyBlockReaderLocal(); 400 return null; 401 } 402 403 private BlockReader getBlockReaderLocal() throws InvalidToken { 404 if (LOG.isTraceEnabled()) { 405 LOG.trace(this + ": trying to construct a BlockReaderLocal " + 406 "for short-circuit reads."); 407 } 408 if (pathInfo == null) { 409 pathInfo = clientContext.getDomainSocketFactory(). 410 getPathInfo(inetSocketAddress, conf); 411 } 412 if (!pathInfo.getPathState().getUsableForShortCircuit()) { 413 PerformanceAdvisory.LOG.debug(this + ": " + pathInfo + " is not " + 414 "usable for short circuit; giving up on BlockReaderLocal."); 415 return null; 416 } 417 ShortCircuitCache cache = clientContext.getShortCircuitCache(); 418 ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId()); 419 ShortCircuitReplicaInfo info = cache.fetchOrCreate(key, this); 420 InvalidToken exc = info.getInvalidTokenException(); 421 if (exc != null) { 422 if (LOG.isTraceEnabled()) { 423 LOG.trace(this + ": got InvalidToken exception while trying to " + 424 "construct BlockReaderLocal via " + pathInfo.getPath()); 425 } 426 throw exc; 427 } 428 if (info.getReplica() == null) { 429 if (LOG.isTraceEnabled()) { 430 PerformanceAdvisory.LOG.debug(this + ": failed to get " + 431 "ShortCircuitReplica. Cannot construct " + 432 "BlockReaderLocal via " + pathInfo.getPath()); 433 } 434 return null; 435 } 436 return new BlockReaderLocal.Builder(conf). 437 setFilename(fileName). 438 setBlock(block). 439 setStartOffset(startOffset). 440 setShortCircuitReplica(info.getReplica()). 441 setVerifyChecksum(verifyChecksum). 442 setCachingStrategy(cachingStrategy). 443 setStorageType(storageType). 444 build(); 445 } 446 447 /** 448 * Fetch a pair of short-circuit block descriptors from a local DataNode. 449 * 450 * @return Null if we could not communicate with the datanode, 451 * a new ShortCircuitReplicaInfo object otherwise. 452 * ShortCircuitReplicaInfo objects may contain either an InvalidToken 453 * exception, or a ShortCircuitReplica object ready to use. 454 */ 455 @Override 456 public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() { 457 if (createShortCircuitReplicaInfoCallback != null) { 458 ShortCircuitReplicaInfo info = 459 createShortCircuitReplicaInfoCallback.createShortCircuitReplicaInfo(); 460 if (info != null) return info; 461 } 462 if (LOG.isTraceEnabled()) { 463 LOG.trace(this + ": trying to create ShortCircuitReplicaInfo."); 464 } 465 BlockReaderPeer curPeer; 466 while (true) { 467 curPeer = nextDomainPeer(); 468 if (curPeer == null) break; 469 if (curPeer.fromCache) remainingCacheTries--; 470 DomainPeer peer = (DomainPeer)curPeer.peer; 471 Slot slot = null; 472 ShortCircuitCache cache = clientContext.getShortCircuitCache(); 473 try { 474 MutableBoolean usedPeer = new MutableBoolean(false); 475 slot = cache.allocShmSlot(datanode, peer, usedPeer, 476 new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId()), 477 clientName); 478 if (usedPeer.booleanValue()) { 479 if (LOG.isTraceEnabled()) { 480 LOG.trace(this + ": allocShmSlot used up our previous socket " + 481 peer.getDomainSocket() + ". Allocating a new one..."); 482 } 483 curPeer = nextDomainPeer(); 484 if (curPeer == null) break; 485 peer = (DomainPeer)curPeer.peer; 486 } 487 ShortCircuitReplicaInfo info = requestFileDescriptors(peer, slot); 488 clientContext.getPeerCache().put(datanode, peer); 489 return info; 490 } catch (IOException e) { 491 if (slot != null) { 492 cache.freeSlot(slot); 493 } 494 if (curPeer.fromCache) { 495 // Handle an I/O error we got when using a cached socket. 496 // These are considered less serious, because the socket may be stale. 497 if (LOG.isDebugEnabled()) { 498 LOG.debug(this + ": closing stale domain peer " + peer, e); 499 } 500 IOUtils.cleanup(LOG, peer); 501 } else { 502 // Handle an I/O error we got when using a newly created socket. 503 // We temporarily disable the domain socket path for a few minutes in 504 // this case, to prevent wasting more time on it. 505 LOG.warn(this + ": I/O error requesting file descriptors. " + 506 "Disabling domain socket " + peer.getDomainSocket(), e); 507 IOUtils.cleanup(LOG, peer); 508 clientContext.getDomainSocketFactory() 509 .disableDomainSocketPath(pathInfo.getPath()); 510 return null; 511 } 512 } 513 } 514 return null; 515 } 516 517 /** 518 * Request file descriptors from a DomainPeer. 519 * 520 * @param peer The peer to use for communication. 521 * @param slot If non-null, the shared memory slot to associate with the 522 * new ShortCircuitReplica. 523 * 524 * @return A ShortCircuitReplica object if we could communicate with the 525 * datanode; null, otherwise. 526 * @throws IOException If we encountered an I/O exception while communicating 527 * with the datanode. 528 */ 529 private ShortCircuitReplicaInfo requestFileDescriptors(DomainPeer peer, 530 Slot slot) throws IOException { 531 ShortCircuitCache cache = clientContext.getShortCircuitCache(); 532 final DataOutputStream out = 533 new DataOutputStream(new BufferedOutputStream(peer.getOutputStream())); 534 SlotId slotId = slot == null ? null : slot.getSlotId(); 535 new Sender(out).requestShortCircuitFds(block, token, slotId, 1, true); 536 DataInputStream in = new DataInputStream(peer.getInputStream()); 537 BlockOpResponseProto resp = BlockOpResponseProto.parseFrom( 538 PBHelper.vintPrefixed(in)); 539 DomainSocket sock = peer.getDomainSocket(); 540 failureInjector.injectRequestFileDescriptorsFailure(); 541 switch (resp.getStatus()) { 542 case SUCCESS: 543 byte buf[] = new byte[1]; 544 FileInputStream fis[] = new FileInputStream[2]; 545 sock.recvFileInputStreams(fis, buf, 0, buf.length); 546 ShortCircuitReplica replica = null; 547 try { 548 ExtendedBlockId key = 549 new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId()); 550 if (buf[0] == USE_RECEIPT_VERIFICATION.getNumber()) { 551 LOG.trace("Sending receipt verification byte for slot " + slot); 552 sock.getOutputStream().write(0); 553 } 554 replica = new ShortCircuitReplica(key, fis[0], fis[1], cache, 555 Time.monotonicNow(), slot); 556 return new ShortCircuitReplicaInfo(replica); 557 } catch (IOException e) { 558 // This indicates an error reading from disk, or a format error. Since 559 // it's not a socket communication problem, we return null rather than 560 // throwing an exception. 561 LOG.warn(this + ": error creating ShortCircuitReplica.", e); 562 return null; 563 } finally { 564 if (replica == null) { 565 IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]); 566 } 567 } 568 case ERROR_UNSUPPORTED: 569 if (!resp.hasShortCircuitAccessVersion()) { 570 LOG.warn("short-circuit read access is disabled for " + 571 "DataNode " + datanode + ". reason: " + resp.getMessage()); 572 clientContext.getDomainSocketFactory() 573 .disableShortCircuitForPath(pathInfo.getPath()); 574 } else { 575 LOG.warn("short-circuit read access for the file " + 576 fileName + " is disabled for DataNode " + datanode + 577 ". reason: " + resp.getMessage()); 578 } 579 return null; 580 case ERROR_ACCESS_TOKEN: 581 String msg = "access control error while " + 582 "attempting to set up short-circuit access to " + 583 fileName + resp.getMessage(); 584 if (LOG.isDebugEnabled()) { 585 LOG.debug(this + ":" + msg); 586 } 587 return new ShortCircuitReplicaInfo(new InvalidToken(msg)); 588 default: 589 LOG.warn(this + ": unknown response code " + resp.getStatus() + 590 " while attempting to set up short-circuit access. " + 591 resp.getMessage()); 592 clientContext.getDomainSocketFactory() 593 .disableShortCircuitForPath(pathInfo.getPath()); 594 return null; 595 } 596 } 597 598 /** 599 * Get a RemoteBlockReader that communicates over a UNIX domain socket. 600 * 601 * @return The new BlockReader, or null if we failed to create the block 602 * reader. 603 * 604 * @throws InvalidToken If the block token was invalid. 605 * Potentially other security-related execptions. 606 */ 607 private BlockReader getRemoteBlockReaderFromDomain() throws IOException { 608 if (pathInfo == null) { 609 pathInfo = clientContext.getDomainSocketFactory(). 610 getPathInfo(inetSocketAddress, conf); 611 } 612 if (!pathInfo.getPathState().getUsableForDataTransfer()) { 613 PerformanceAdvisory.LOG.debug(this + ": not trying to create a " + 614 "remote block reader because the UNIX domain socket at " + 615 pathInfo + " is not usable."); 616 return null; 617 } 618 if (LOG.isTraceEnabled()) { 619 LOG.trace(this + ": trying to create a remote block reader from the " + 620 "UNIX domain socket at " + pathInfo.getPath()); 621 } 622 623 while (true) { 624 BlockReaderPeer curPeer = nextDomainPeer(); 625 if (curPeer == null) break; 626 if (curPeer.fromCache) remainingCacheTries--; 627 DomainPeer peer = (DomainPeer)curPeer.peer; 628 BlockReader blockReader = null; 629 try { 630 blockReader = getRemoteBlockReader(peer); 631 return blockReader; 632 } catch (IOException ioe) { 633 IOUtils.cleanup(LOG, peer); 634 if (isSecurityException(ioe)) { 635 if (LOG.isTraceEnabled()) { 636 LOG.trace(this + ": got security exception while constructing " + 637 "a remote block reader from the unix domain socket at " + 638 pathInfo.getPath(), ioe); 639 } 640 throw ioe; 641 } 642 if (curPeer.fromCache) { 643 // Handle an I/O error we got when using a cached peer. These are 644 // considered less serious, because the underlying socket may be stale. 645 if (LOG.isDebugEnabled()) { 646 LOG.debug("Closed potentially stale domain peer " + peer, ioe); 647 } 648 } else { 649 // Handle an I/O error we got when using a newly created domain peer. 650 // We temporarily disable the domain socket path for a few minutes in 651 // this case, to prevent wasting more time on it. 652 LOG.warn("I/O error constructing remote block reader. Disabling " + 653 "domain socket " + peer.getDomainSocket(), ioe); 654 clientContext.getDomainSocketFactory() 655 .disableDomainSocketPath(pathInfo.getPath()); 656 return null; 657 } 658 } finally { 659 if (blockReader == null) { 660 IOUtils.cleanup(LOG, peer); 661 } 662 } 663 } 664 return null; 665 } 666 667 /** 668 * Get a RemoteBlockReader that communicates over a TCP socket. 669 * 670 * @return The new BlockReader. We will not return null, but instead throw 671 * an exception if this fails. 672 * 673 * @throws InvalidToken 674 * If the block token was invalid. 675 * InvalidEncryptionKeyException 676 * If the encryption key was invalid. 677 * Other IOException 678 * If there was another problem. 679 */ 680 private BlockReader getRemoteBlockReaderFromTcp() throws IOException { 681 if (LOG.isTraceEnabled()) { 682 LOG.trace(this + ": trying to create a remote block reader from a " + 683 "TCP socket"); 684 } 685 BlockReader blockReader = null; 686 while (true) { 687 BlockReaderPeer curPeer = null; 688 Peer peer = null; 689 try { 690 curPeer = nextTcpPeer(); 691 if (curPeer.fromCache) remainingCacheTries--; 692 peer = curPeer.peer; 693 blockReader = getRemoteBlockReader(peer); 694 return blockReader; 695 } catch (IOException ioe) { 696 if (isSecurityException(ioe)) { 697 if (LOG.isTraceEnabled()) { 698 LOG.trace(this + ": got security exception while constructing " + 699 "a remote block reader from " + peer, ioe); 700 } 701 throw ioe; 702 } 703 if ((curPeer != null) && curPeer.fromCache) { 704 // Handle an I/O error we got when using a cached peer. These are 705 // considered less serious, because the underlying socket may be 706 // stale. 707 if (LOG.isDebugEnabled()) { 708 LOG.debug("Closed potentially stale remote peer " + peer, ioe); 709 } 710 } else { 711 // Handle an I/O error we got when using a newly created peer. 712 LOG.warn("I/O error constructing remote block reader.", ioe); 713 throw ioe; 714 } 715 } finally { 716 if (blockReader == null) { 717 IOUtils.cleanup(LOG, peer); 718 } 719 } 720 } 721 } 722 723 public static class BlockReaderPeer { 724 final Peer peer; 725 final boolean fromCache; 726 727 BlockReaderPeer(Peer peer, boolean fromCache) { 728 this.peer = peer; 729 this.fromCache = fromCache; 730 } 731 } 732 733 /** 734 * Get the next DomainPeer-- either from the cache or by creating it. 735 * 736 * @return the next DomainPeer, or null if we could not construct one. 737 */ 738 private BlockReaderPeer nextDomainPeer() { 739 if (remainingCacheTries > 0) { 740 Peer peer = clientContext.getPeerCache().get(datanode, true); 741 if (peer != null) { 742 if (LOG.isTraceEnabled()) { 743 LOG.trace("nextDomainPeer: reusing existing peer " + peer); 744 } 745 return new BlockReaderPeer(peer, true); 746 } 747 } 748 DomainSocket sock = clientContext.getDomainSocketFactory(). 749 createSocket(pathInfo, conf.socketTimeout); 750 if (sock == null) return null; 751 return new BlockReaderPeer(new DomainPeer(sock), false); 752 } 753 754 /** 755 * Get the next TCP-based peer-- either from the cache or by creating it. 756 * 757 * @return the next Peer, or null if we could not construct one. 758 * 759 * @throws IOException If there was an error while constructing the peer 760 * (such as an InvalidEncryptionKeyException) 761 */ 762 private BlockReaderPeer nextTcpPeer() throws IOException { 763 if (remainingCacheTries > 0) { 764 Peer peer = clientContext.getPeerCache().get(datanode, false); 765 if (peer != null) { 766 if (LOG.isTraceEnabled()) { 767 LOG.trace("nextTcpPeer: reusing existing peer " + peer); 768 } 769 return new BlockReaderPeer(peer, true); 770 } 771 } 772 try { 773 Peer peer = remotePeerFactory.newConnectedPeer(inetSocketAddress, token, 774 datanode); 775 if (LOG.isTraceEnabled()) { 776 LOG.trace("nextTcpPeer: created newConnectedPeer " + peer); 777 } 778 return new BlockReaderPeer(peer, false); 779 } catch (IOException e) { 780 if (LOG.isTraceEnabled()) { 781 LOG.trace("nextTcpPeer: failed to create newConnectedPeer " + 782 "connected to " + datanode); 783 } 784 throw e; 785 } 786 } 787 788 /** 789 * Determine if an exception is security-related. 790 * 791 * We need to handle these exceptions differently than other IOExceptions. 792 * They don't indicate a communication problem. Instead, they mean that there 793 * is some action the client needs to take, such as refetching block tokens, 794 * renewing encryption keys, etc. 795 * 796 * @param ioe The exception 797 * @return True only if the exception is security-related. 798 */ 799 private static boolean isSecurityException(IOException ioe) { 800 return (ioe instanceof InvalidToken) || 801 (ioe instanceof InvalidEncryptionKeyException) || 802 (ioe instanceof InvalidBlockTokenException) || 803 (ioe instanceof AccessControlException); 804 } 805 806 @SuppressWarnings("deprecation") 807 private BlockReader getRemoteBlockReader(Peer peer) throws IOException { 808 if (conf.useLegacyBlockReader) { 809 return RemoteBlockReader.newBlockReader(fileName, 810 block, token, startOffset, length, conf.ioBufferSize, 811 verifyChecksum, clientName, peer, datanode, 812 clientContext.getPeerCache(), cachingStrategy); 813 } else { 814 return RemoteBlockReader2.newBlockReader( 815 fileName, block, token, startOffset, length, 816 verifyChecksum, clientName, peer, datanode, 817 clientContext.getPeerCache(), cachingStrategy); 818 } 819 } 820 821 @Override 822 public String toString() { 823 return "BlockReaderFactory(fileName=" + fileName + ", block=" + block + ")"; 824 } 825 826 /** 827 * File name to print when accessing a block directly (from servlets) 828 * @param s Address of the block location 829 * @param poolId Block pool ID of the block 830 * @param blockId Block ID of the block 831 * @return string that has a file name for debug purposes 832 */ 833 public static String getFileName(final InetSocketAddress s, 834 final String poolId, final long blockId) { 835 return s.toString() + ":" + poolId + ":" + blockId; 836 } 837}