001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.hdfs; 019 020 import java.io.FileInputStream; 021 import java.io.IOException; 022 import java.net.InetSocketAddress; 023 import java.net.Socket; 024 import java.nio.ByteBuffer; 025 import java.util.AbstractMap; 026 import java.util.ArrayList; 027 import java.util.HashMap; 028 import java.util.HashSet; 029 import java.util.Iterator; 030 import java.util.List; 031 import java.util.Map; 032 import java.util.Map.Entry; 033 import java.util.Set; 034 import java.util.concurrent.ConcurrentHashMap; 035 036 import org.apache.commons.io.IOUtils; 037 import org.apache.hadoop.classification.InterfaceAudience; 038 import org.apache.hadoop.fs.ByteBufferReadable; 039 import org.apache.hadoop.fs.CanSetDropBehind; 040 import org.apache.hadoop.fs.CanSetReadahead; 041 import org.apache.hadoop.fs.ChecksumException; 042 import org.apache.hadoop.fs.FSInputStream; 043 import org.apache.hadoop.fs.UnresolvedLinkException; 044 import org.apache.hadoop.hdfs.net.DomainPeer; 045 import org.apache.hadoop.hdfs.net.Peer; 046 import org.apache.hadoop.hdfs.net.TcpPeerServer; 047 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; 048 import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 049 import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 050 import org.apache.hadoop.hdfs.protocol.LocatedBlock; 051 import org.apache.hadoop.hdfs.protocol.LocatedBlocks; 052 import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException; 053 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; 054 import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; 055 import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; 056 import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException; 057 import org.apache.hadoop.ipc.RPC; 058 import org.apache.hadoop.ipc.RemoteException; 059 import org.apache.hadoop.net.NetUtils; 060 import org.apache.hadoop.net.unix.DomainSocket; 061 import org.apache.hadoop.security.AccessControlException; 062 import org.apache.hadoop.security.token.Token; 063 064 import com.google.common.annotations.VisibleForTesting; 065 066 /**************************************************************** 067 * DFSInputStream provides bytes from a named file. It handles 068 * negotiation of the namenode and various datanodes as necessary. 069 ****************************************************************/ 070 @InterfaceAudience.Private 071 public class DFSInputStream extends FSInputStream 072 implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead { 073 @VisibleForTesting 074 static boolean tcpReadsDisabledForTesting = false; 075 private final PeerCache peerCache; 076 private final DFSClient dfsClient; 077 private boolean closed = false; 078 private final String src; 079 private BlockReader blockReader = null; 080 private final boolean verifyChecksum; 081 private LocatedBlocks locatedBlocks = null; 082 private long lastBlockBeingWrittenLength = 0; 083 private DatanodeInfo currentNode = null; 084 private LocatedBlock currentLocatedBlock = null; 085 private long pos = 0; 086 private long blockEnd = -1; 087 private CachingStrategy cachingStrategy; 088 private final ReadStatistics readStatistics = new ReadStatistics(); 089 090 public static class ReadStatistics { 091 public ReadStatistics() { 092 this.totalBytesRead = 0; 093 this.totalLocalBytesRead = 0; 094 this.totalShortCircuitBytesRead = 0; 095 } 096 097 public ReadStatistics(ReadStatistics rhs) { 098 this.totalBytesRead = rhs.getTotalBytesRead(); 099 this.totalLocalBytesRead = rhs.getTotalLocalBytesRead(); 100 this.totalShortCircuitBytesRead = rhs.getTotalShortCircuitBytesRead(); 101 } 102 103 /** 104 * @return The total bytes read. This will always be at least as 105 * high as the other numbers, since it includes all of them. 106 */ 107 public long getTotalBytesRead() { 108 return totalBytesRead; 109 } 110 111 /** 112 * @return The total local bytes read. This will always be at least 113 * as high as totalShortCircuitBytesRead, since all short-circuit 114 * reads are also local. 115 */ 116 public long getTotalLocalBytesRead() { 117 return totalLocalBytesRead; 118 } 119 120 /** 121 * @return The total short-circuit local bytes read. 122 */ 123 public long getTotalShortCircuitBytesRead() { 124 return totalShortCircuitBytesRead; 125 } 126 127 /** 128 * @return The total number of bytes read which were not local. 129 */ 130 public long getRemoteBytesRead() { 131 return totalBytesRead - totalLocalBytesRead; 132 } 133 134 void addRemoteBytes(long amt) { 135 this.totalBytesRead += amt; 136 } 137 138 void addLocalBytes(long amt) { 139 this.totalBytesRead += amt; 140 this.totalLocalBytesRead += amt; 141 } 142 143 void addShortCircuitBytes(long amt) { 144 this.totalBytesRead += amt; 145 this.totalLocalBytesRead += amt; 146 this.totalShortCircuitBytesRead += amt; 147 } 148 149 private long totalBytesRead; 150 151 private long totalLocalBytesRead; 152 153 private long totalShortCircuitBytesRead; 154 } 155 156 private final FileInputStreamCache fileInputStreamCache; 157 158 /** 159 * This variable tracks the number of failures since the start of the 160 * most recent user-facing operation. That is to say, it should be reset 161 * whenever the user makes a call on this stream, and if at any point 162 * during the retry logic, the failure count exceeds a threshold, 163 * the errors will be thrown back to the operation. 164 * 165 * Specifically this counts the number of times the client has gone 166 * back to the namenode to get a new list of block locations, and is 167 * capped at maxBlockAcquireFailures 168 */ 169 private int failures = 0; 170 171 /* XXX Use of CocurrentHashMap is temp fix. Need to fix 172 * parallel accesses to DFSInputStream (through ptreads) properly */ 173 private final ConcurrentHashMap<DatanodeInfo, DatanodeInfo> deadNodes = 174 new ConcurrentHashMap<DatanodeInfo, DatanodeInfo>(); 175 private int buffersize = 1; 176 177 private final byte[] oneByteBuf = new byte[1]; // used for 'int read()' 178 179 void addToDeadNodes(DatanodeInfo dnInfo) { 180 deadNodes.put(dnInfo, dnInfo); 181 } 182 183 DFSInputStream(DFSClient dfsClient, String src, int buffersize, boolean verifyChecksum 184 ) throws IOException, UnresolvedLinkException { 185 this.dfsClient = dfsClient; 186 this.verifyChecksum = verifyChecksum; 187 this.buffersize = buffersize; 188 this.src = src; 189 this.peerCache = dfsClient.peerCache; 190 this.fileInputStreamCache = new FileInputStreamCache( 191 dfsClient.getConf().shortCircuitStreamsCacheSize, 192 dfsClient.getConf().shortCircuitStreamsCacheExpiryMs); 193 this.cachingStrategy = 194 dfsClient.getDefaultReadCachingStrategy().duplicate(); 195 openInfo(); 196 } 197 198 /** 199 * Grab the open-file info from namenode 200 */ 201 synchronized void openInfo() throws IOException, UnresolvedLinkException { 202 lastBlockBeingWrittenLength = fetchLocatedBlocksAndGetLastBlockLength(); 203 int retriesForLastBlockLength = 3; 204 while (retriesForLastBlockLength > 0) { 205 // Getting last block length as -1 is a special case. When cluster 206 // restarts, DNs may not report immediately. At this time partial block 207 // locations will not be available with NN for getting the length. Lets 208 // retry for 3 times to get the length. 209 if (lastBlockBeingWrittenLength == -1) { 210 DFSClient.LOG.warn("Last block locations not available. " 211 + "Datanodes might not have reported blocks completely." 212 + " Will retry for " + retriesForLastBlockLength + " times"); 213 waitFor(4000); 214 lastBlockBeingWrittenLength = fetchLocatedBlocksAndGetLastBlockLength(); 215 } else { 216 break; 217 } 218 retriesForLastBlockLength--; 219 } 220 if (retriesForLastBlockLength == 0) { 221 throw new IOException("Could not obtain the last block locations."); 222 } 223 } 224 225 private void waitFor(int waitTime) throws IOException { 226 try { 227 Thread.sleep(waitTime); 228 } catch (InterruptedException e) { 229 throw new IOException( 230 "Interrupted while getting the last block length."); 231 } 232 } 233 234 private long fetchLocatedBlocksAndGetLastBlockLength() throws IOException { 235 final LocatedBlocks newInfo = dfsClient.getLocatedBlocks(src, 0); 236 if (DFSClient.LOG.isDebugEnabled()) { 237 DFSClient.LOG.debug("newInfo = " + newInfo); 238 } 239 if (newInfo == null) { 240 throw new IOException("Cannot open filename " + src); 241 } 242 243 if (locatedBlocks != null) { 244 Iterator<LocatedBlock> oldIter = locatedBlocks.getLocatedBlocks().iterator(); 245 Iterator<LocatedBlock> newIter = newInfo.getLocatedBlocks().iterator(); 246 while (oldIter.hasNext() && newIter.hasNext()) { 247 if (! oldIter.next().getBlock().equals(newIter.next().getBlock())) { 248 throw new IOException("Blocklist for " + src + " has changed!"); 249 } 250 } 251 } 252 locatedBlocks = newInfo; 253 long lastBlockBeingWrittenLength = 0; 254 if (!locatedBlocks.isLastBlockComplete()) { 255 final LocatedBlock last = locatedBlocks.getLastLocatedBlock(); 256 if (last != null) { 257 if (last.getLocations().length == 0) { 258 return -1; 259 } 260 final long len = readBlockLength(last); 261 last.getBlock().setNumBytes(len); 262 lastBlockBeingWrittenLength = len; 263 } 264 } 265 266 currentNode = null; 267 return lastBlockBeingWrittenLength; 268 } 269 270 /** Read the block length from one of the datanodes. */ 271 private long readBlockLength(LocatedBlock locatedblock) throws IOException { 272 assert locatedblock != null : "LocatedBlock cannot be null"; 273 int replicaNotFoundCount = locatedblock.getLocations().length; 274 275 for(DatanodeInfo datanode : locatedblock.getLocations()) { 276 ClientDatanodeProtocol cdp = null; 277 278 try { 279 cdp = DFSUtil.createClientDatanodeProtocolProxy(datanode, 280 dfsClient.getConfiguration(), dfsClient.getConf().socketTimeout, 281 dfsClient.getConf().connectToDnViaHostname, locatedblock); 282 283 final long n = cdp.getReplicaVisibleLength(locatedblock.getBlock()); 284 285 if (n >= 0) { 286 return n; 287 } 288 } 289 catch(IOException ioe) { 290 if (ioe instanceof RemoteException && 291 (((RemoteException) ioe).unwrapRemoteException() instanceof 292 ReplicaNotFoundException)) { 293 // special case : replica might not be on the DN, treat as 0 length 294 replicaNotFoundCount--; 295 } 296 297 if (DFSClient.LOG.isDebugEnabled()) { 298 DFSClient.LOG.debug("Failed to getReplicaVisibleLength from datanode " 299 + datanode + " for block " + locatedblock.getBlock(), ioe); 300 } 301 } finally { 302 if (cdp != null) { 303 RPC.stopProxy(cdp); 304 } 305 } 306 } 307 308 // Namenode told us about these locations, but none know about the replica 309 // means that we hit the race between pipeline creation start and end. 310 // we require all 3 because some other exception could have happened 311 // on a DN that has it. we want to report that error 312 if (replicaNotFoundCount == 0) { 313 return 0; 314 } 315 316 throw new IOException("Cannot obtain block length for " + locatedblock); 317 } 318 319 public synchronized long getFileLength() { 320 return locatedBlocks == null? 0: 321 locatedBlocks.getFileLength() + lastBlockBeingWrittenLength; 322 } 323 324 // Short circuit local reads are forbidden for files that are 325 // under construction. See HDFS-2757. 326 synchronized boolean shortCircuitForbidden() { 327 return locatedBlocks.isUnderConstruction(); 328 } 329 330 /** 331 * Returns the datanode from which the stream is currently reading. 332 */ 333 public DatanodeInfo getCurrentDatanode() { 334 return currentNode; 335 } 336 337 /** 338 * Returns the block containing the target position. 339 */ 340 synchronized public ExtendedBlock getCurrentBlock() { 341 if (currentLocatedBlock == null){ 342 return null; 343 } 344 return currentLocatedBlock.getBlock(); 345 } 346 347 /** 348 * Return collection of blocks that has already been located. 349 */ 350 public synchronized List<LocatedBlock> getAllBlocks() throws IOException { 351 return getBlockRange(0, getFileLength()); 352 } 353 354 /** 355 * Get block at the specified position. 356 * Fetch it from the namenode if not cached. 357 * 358 * @param offset 359 * @param updatePosition whether to update current position 360 * @return located block 361 * @throws IOException 362 */ 363 private synchronized LocatedBlock getBlockAt(long offset, 364 boolean updatePosition) throws IOException { 365 assert (locatedBlocks != null) : "locatedBlocks is null"; 366 367 final LocatedBlock blk; 368 369 //check offset 370 if (offset < 0 || offset >= getFileLength()) { 371 throw new IOException("offset < 0 || offset > getFileLength(), offset=" 372 + offset 373 + ", updatePosition=" + updatePosition 374 + ", locatedBlocks=" + locatedBlocks); 375 } 376 else if (offset >= locatedBlocks.getFileLength()) { 377 // offset to the portion of the last block, 378 // which is not known to the name-node yet; 379 // getting the last block 380 blk = locatedBlocks.getLastLocatedBlock(); 381 } 382 else { 383 // search cached blocks first 384 int targetBlockIdx = locatedBlocks.findBlock(offset); 385 if (targetBlockIdx < 0) { // block is not cached 386 targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx); 387 // fetch more blocks 388 final LocatedBlocks newBlocks = dfsClient.getLocatedBlocks(src, offset); 389 assert (newBlocks != null) : "Could not find target position " + offset; 390 locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks()); 391 } 392 blk = locatedBlocks.get(targetBlockIdx); 393 } 394 395 // update current position 396 if (updatePosition) { 397 pos = offset; 398 blockEnd = blk.getStartOffset() + blk.getBlockSize() - 1; 399 currentLocatedBlock = blk; 400 } 401 return blk; 402 } 403 404 /** Fetch a block from namenode and cache it */ 405 private synchronized void fetchBlockAt(long offset) throws IOException { 406 int targetBlockIdx = locatedBlocks.findBlock(offset); 407 if (targetBlockIdx < 0) { // block is not cached 408 targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx); 409 } 410 // fetch blocks 411 final LocatedBlocks newBlocks = dfsClient.getLocatedBlocks(src, offset); 412 if (newBlocks == null) { 413 throw new IOException("Could not find target position " + offset); 414 } 415 locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks()); 416 } 417 418 /** 419 * Get blocks in the specified range. 420 * Fetch them from the namenode if not cached. This function 421 * will not get a read request beyond the EOF. 422 * @param offset 423 * @param length 424 * @return consequent segment of located blocks 425 * @throws IOException 426 */ 427 private synchronized List<LocatedBlock> getBlockRange(long offset, 428 long length) 429 throws IOException { 430 // getFileLength(): returns total file length 431 // locatedBlocks.getFileLength(): returns length of completed blocks 432 if (offset >= getFileLength()) { 433 throw new IOException("Offset: " + offset + 434 " exceeds file length: " + getFileLength()); 435 } 436 437 final List<LocatedBlock> blocks; 438 final long lengthOfCompleteBlk = locatedBlocks.getFileLength(); 439 final boolean readOffsetWithinCompleteBlk = offset < lengthOfCompleteBlk; 440 final boolean readLengthPastCompleteBlk = offset + length > lengthOfCompleteBlk; 441 442 if (readOffsetWithinCompleteBlk) { 443 //get the blocks of finalized (completed) block range 444 blocks = getFinalizedBlockRange(offset, 445 Math.min(length, lengthOfCompleteBlk - offset)); 446 } else { 447 blocks = new ArrayList<LocatedBlock>(1); 448 } 449 450 // get the blocks from incomplete block range 451 if (readLengthPastCompleteBlk) { 452 blocks.add(locatedBlocks.getLastLocatedBlock()); 453 } 454 455 return blocks; 456 } 457 458 /** 459 * Get blocks in the specified range. 460 * Includes only the complete blocks. 461 * Fetch them from the namenode if not cached. 462 */ 463 private synchronized List<LocatedBlock> getFinalizedBlockRange( 464 long offset, long length) throws IOException { 465 assert (locatedBlocks != null) : "locatedBlocks is null"; 466 List<LocatedBlock> blockRange = new ArrayList<LocatedBlock>(); 467 // search cached blocks first 468 int blockIdx = locatedBlocks.findBlock(offset); 469 if (blockIdx < 0) { // block is not cached 470 blockIdx = LocatedBlocks.getInsertIndex(blockIdx); 471 } 472 long remaining = length; 473 long curOff = offset; 474 while(remaining > 0) { 475 LocatedBlock blk = null; 476 if(blockIdx < locatedBlocks.locatedBlockCount()) 477 blk = locatedBlocks.get(blockIdx); 478 if (blk == null || curOff < blk.getStartOffset()) { 479 LocatedBlocks newBlocks; 480 newBlocks = dfsClient.getLocatedBlocks(src, curOff, remaining); 481 locatedBlocks.insertRange(blockIdx, newBlocks.getLocatedBlocks()); 482 continue; 483 } 484 assert curOff >= blk.getStartOffset() : "Block not found"; 485 blockRange.add(blk); 486 long bytesRead = blk.getStartOffset() + blk.getBlockSize() - curOff; 487 remaining -= bytesRead; 488 curOff += bytesRead; 489 blockIdx++; 490 } 491 return blockRange; 492 } 493 494 /** 495 * Open a DataInputStream to a DataNode so that it can be read from. 496 * We get block ID and the IDs of the destinations at startup, from the namenode. 497 */ 498 private synchronized DatanodeInfo blockSeekTo(long target) throws IOException { 499 if (target >= getFileLength()) { 500 throw new IOException("Attempted to read past end of file"); 501 } 502 503 // Will be getting a new BlockReader. 504 if (blockReader != null) { 505 blockReader.close(); 506 blockReader = null; 507 } 508 509 // 510 // Connect to best DataNode for desired Block, with potential offset 511 // 512 DatanodeInfo chosenNode = null; 513 int refetchToken = 1; // only need to get a new access token once 514 int refetchEncryptionKey = 1; // only need to get a new encryption key once 515 516 boolean connectFailedOnce = false; 517 518 while (true) { 519 // 520 // Compute desired block 521 // 522 LocatedBlock targetBlock = getBlockAt(target, true); 523 assert (target==pos) : "Wrong postion " + pos + " expect " + target; 524 long offsetIntoBlock = target - targetBlock.getStartOffset(); 525 526 DNAddrPair retval = chooseDataNode(targetBlock); 527 chosenNode = retval.info; 528 InetSocketAddress targetAddr = retval.addr; 529 530 try { 531 ExtendedBlock blk = targetBlock.getBlock(); 532 Token<BlockTokenIdentifier> accessToken = targetBlock.getBlockToken(); 533 blockReader = getBlockReader(targetAddr, chosenNode, src, blk, 534 accessToken, offsetIntoBlock, blk.getNumBytes() - offsetIntoBlock, 535 buffersize, verifyChecksum, dfsClient.clientName); 536 if(connectFailedOnce) { 537 DFSClient.LOG.info("Successfully connected to " + targetAddr + 538 " for " + blk); 539 } 540 return chosenNode; 541 } catch (AccessControlException ex) { 542 DFSClient.LOG.warn("Short circuit access failed " + ex); 543 dfsClient.disableLegacyBlockReaderLocal(); 544 continue; 545 } catch (IOException ex) { 546 if (ex instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) { 547 DFSClient.LOG.info("Will fetch a new encryption key and retry, " 548 + "encryption key was invalid when connecting to " + targetAddr 549 + " : " + ex); 550 // The encryption key used is invalid. 551 refetchEncryptionKey--; 552 dfsClient.clearDataEncryptionKey(); 553 } else if (ex instanceof InvalidBlockTokenException && refetchToken > 0) { 554 DFSClient.LOG.info("Will fetch a new access token and retry, " 555 + "access token was invalid when connecting to " + targetAddr 556 + " : " + ex); 557 /* 558 * Get a new access token and retry. Retry is needed in 2 cases. 1) 559 * When both NN and DN re-started while DFSClient holding a cached 560 * access token. 2) In the case that NN fails to update its 561 * access key at pre-set interval (by a wide margin) and 562 * subsequently restarts. In this case, DN re-registers itself with 563 * NN and receives a new access key, but DN will delete the old 564 * access key from its memory since it's considered expired based on 565 * the estimated expiration date. 566 */ 567 refetchToken--; 568 fetchBlockAt(target); 569 } else { 570 connectFailedOnce = true; 571 DFSClient.LOG.warn("Failed to connect to " + targetAddr + " for block" 572 + ", add to deadNodes and continue. " + ex, ex); 573 // Put chosen node into dead list, continue 574 addToDeadNodes(chosenNode); 575 } 576 } 577 } 578 } 579 580 /** 581 * Close it down! 582 */ 583 @Override 584 public synchronized void close() throws IOException { 585 if (closed) { 586 return; 587 } 588 dfsClient.checkOpen(); 589 590 if (blockReader != null) { 591 blockReader.close(); 592 blockReader = null; 593 } 594 super.close(); 595 fileInputStreamCache.close(); 596 closed = true; 597 } 598 599 @Override 600 public synchronized int read() throws IOException { 601 int ret = read( oneByteBuf, 0, 1 ); 602 return ( ret <= 0 ) ? -1 : (oneByteBuf[0] & 0xff); 603 } 604 605 /** 606 * Wraps different possible read implementations so that readBuffer can be 607 * strategy-agnostic. 608 */ 609 private interface ReaderStrategy { 610 public int doRead(BlockReader blockReader, int off, int len, 611 ReadStatistics readStatistics) throws ChecksumException, IOException; 612 } 613 614 private static void updateReadStatistics(ReadStatistics readStatistics, 615 int nRead, BlockReader blockReader) { 616 if (nRead <= 0) return; 617 if (blockReader.isShortCircuit()) { 618 readStatistics.totalBytesRead += nRead; 619 readStatistics.totalLocalBytesRead += nRead; 620 readStatistics.totalShortCircuitBytesRead += nRead; 621 } else if (blockReader.isLocal()) { 622 readStatistics.totalBytesRead += nRead; 623 readStatistics.totalLocalBytesRead += nRead; 624 } else { 625 readStatistics.totalBytesRead += nRead; 626 } 627 } 628 629 /** 630 * Used to read bytes into a byte[] 631 */ 632 private static class ByteArrayStrategy implements ReaderStrategy { 633 final byte[] buf; 634 635 public ByteArrayStrategy(byte[] buf) { 636 this.buf = buf; 637 } 638 639 @Override 640 public int doRead(BlockReader blockReader, int off, int len, 641 ReadStatistics readStatistics) throws ChecksumException, IOException { 642 int nRead = blockReader.read(buf, off, len); 643 updateReadStatistics(readStatistics, nRead, blockReader); 644 return nRead; 645 } 646 } 647 648 /** 649 * Used to read bytes into a user-supplied ByteBuffer 650 */ 651 private static class ByteBufferStrategy implements ReaderStrategy { 652 final ByteBuffer buf; 653 ByteBufferStrategy(ByteBuffer buf) { 654 this.buf = buf; 655 } 656 657 @Override 658 public int doRead(BlockReader blockReader, int off, int len, 659 ReadStatistics readStatistics) throws ChecksumException, IOException { 660 int oldpos = buf.position(); 661 int oldlimit = buf.limit(); 662 boolean success = false; 663 try { 664 int ret = blockReader.read(buf); 665 success = true; 666 updateReadStatistics(readStatistics, ret, blockReader); 667 return ret; 668 } finally { 669 if (!success) { 670 // Reset to original state so that retries work correctly. 671 buf.position(oldpos); 672 buf.limit(oldlimit); 673 } 674 } 675 } 676 } 677 678 /* This is a used by regular read() and handles ChecksumExceptions. 679 * name readBuffer() is chosen to imply similarity to readBuffer() in 680 * ChecksumFileSystem 681 */ 682 private synchronized int readBuffer(ReaderStrategy reader, int off, int len, 683 Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap) 684 throws IOException { 685 IOException ioe; 686 687 /* we retry current node only once. So this is set to true only here. 688 * Intention is to handle one common case of an error that is not a 689 * failure on datanode or client : when DataNode closes the connection 690 * since client is idle. If there are other cases of "non-errors" then 691 * then a datanode might be retried by setting this to true again. 692 */ 693 boolean retryCurrentNode = true; 694 695 while (true) { 696 // retry as many times as seekToNewSource allows. 697 try { 698 return reader.doRead(blockReader, off, len, readStatistics); 699 } catch ( ChecksumException ce ) { 700 DFSClient.LOG.warn("Found Checksum error for " 701 + getCurrentBlock() + " from " + currentNode 702 + " at " + ce.getPos()); 703 ioe = ce; 704 retryCurrentNode = false; 705 // we want to remember which block replicas we have tried 706 addIntoCorruptedBlockMap(getCurrentBlock(), currentNode, 707 corruptedBlockMap); 708 } catch ( IOException e ) { 709 if (!retryCurrentNode) { 710 DFSClient.LOG.warn("Exception while reading from " 711 + getCurrentBlock() + " of " + src + " from " 712 + currentNode, e); 713 } 714 ioe = e; 715 } 716 boolean sourceFound = false; 717 if (retryCurrentNode) { 718 /* possibly retry the same node so that transient errors don't 719 * result in application level failures (e.g. Datanode could have 720 * closed the connection because the client is idle for too long). 721 */ 722 sourceFound = seekToBlockSource(pos); 723 } else { 724 addToDeadNodes(currentNode); 725 sourceFound = seekToNewSource(pos); 726 } 727 if (!sourceFound) { 728 throw ioe; 729 } 730 retryCurrentNode = false; 731 } 732 } 733 734 private int readWithStrategy(ReaderStrategy strategy, int off, int len) throws IOException { 735 dfsClient.checkOpen(); 736 if (closed) { 737 throw new IOException("Stream closed"); 738 } 739 Map<ExtendedBlock,Set<DatanodeInfo>> corruptedBlockMap 740 = new HashMap<ExtendedBlock, Set<DatanodeInfo>>(); 741 failures = 0; 742 if (pos < getFileLength()) { 743 int retries = 2; 744 while (retries > 0) { 745 try { 746 // currentNode can be left as null if previous read had a checksum 747 // error on the same block. See HDFS-3067 748 if (pos > blockEnd || currentNode == null) { 749 currentNode = blockSeekTo(pos); 750 } 751 int realLen = (int) Math.min(len, (blockEnd - pos + 1L)); 752 int result = readBuffer(strategy, off, realLen, corruptedBlockMap); 753 754 if (result >= 0) { 755 pos += result; 756 } else { 757 // got a EOS from reader though we expect more data on it. 758 throw new IOException("Unexpected EOS from the reader"); 759 } 760 if (dfsClient.stats != null && result != -1) { 761 dfsClient.stats.incrementBytesRead(result); 762 } 763 return result; 764 } catch (ChecksumException ce) { 765 throw ce; 766 } catch (IOException e) { 767 if (retries == 1) { 768 DFSClient.LOG.warn("DFS Read", e); 769 } 770 blockEnd = -1; 771 if (currentNode != null) { addToDeadNodes(currentNode); } 772 if (--retries == 0) { 773 throw e; 774 } 775 } finally { 776 // Check if need to report block replicas corruption either read 777 // was successful or ChecksumException occured. 778 reportCheckSumFailure(corruptedBlockMap, 779 currentLocatedBlock.getLocations().length); 780 } 781 } 782 } 783 return -1; 784 } 785 786 /** 787 * Read the entire buffer. 788 */ 789 @Override 790 public synchronized int read(final byte buf[], int off, int len) throws IOException { 791 ReaderStrategy byteArrayReader = new ByteArrayStrategy(buf); 792 793 return readWithStrategy(byteArrayReader, off, len); 794 } 795 796 @Override 797 public synchronized int read(final ByteBuffer buf) throws IOException { 798 ReaderStrategy byteBufferReader = new ByteBufferStrategy(buf); 799 800 return readWithStrategy(byteBufferReader, 0, buf.remaining()); 801 } 802 803 804 /** 805 * Add corrupted block replica into map. 806 * @param corruptedBlockMap 807 */ 808 private void addIntoCorruptedBlockMap(ExtendedBlock blk, DatanodeInfo node, 809 Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap) { 810 Set<DatanodeInfo> dnSet = null; 811 if((corruptedBlockMap.containsKey(blk))) { 812 dnSet = corruptedBlockMap.get(blk); 813 }else { 814 dnSet = new HashSet<DatanodeInfo>(); 815 } 816 if (!dnSet.contains(node)) { 817 dnSet.add(node); 818 corruptedBlockMap.put(blk, dnSet); 819 } 820 } 821 822 private DNAddrPair chooseDataNode(LocatedBlock block) 823 throws IOException { 824 while (true) { 825 DatanodeInfo[] nodes = block.getLocations(); 826 try { 827 DatanodeInfo chosenNode = bestNode(nodes, deadNodes); 828 final String dnAddr = 829 chosenNode.getXferAddr(dfsClient.getConf().connectToDnViaHostname); 830 if (DFSClient.LOG.isDebugEnabled()) { 831 DFSClient.LOG.debug("Connecting to datanode " + dnAddr); 832 } 833 InetSocketAddress targetAddr = NetUtils.createSocketAddr(dnAddr); 834 return new DNAddrPair(chosenNode, targetAddr); 835 } catch (IOException ie) { 836 String blockInfo = block.getBlock() + " file=" + src; 837 if (failures >= dfsClient.getMaxBlockAcquireFailures()) { 838 throw new BlockMissingException(src, "Could not obtain block: " + blockInfo, 839 block.getStartOffset()); 840 } 841 842 if (nodes == null || nodes.length == 0) { 843 DFSClient.LOG.info("No node available for " + blockInfo); 844 } 845 DFSClient.LOG.info("Could not obtain " + block.getBlock() 846 + " from any node: " + ie 847 + ". Will get new block locations from namenode and retry..."); 848 try { 849 // Introducing a random factor to the wait time before another retry. 850 // The wait time is dependent on # of failures and a random factor. 851 // At the first time of getting a BlockMissingException, the wait time 852 // is a random number between 0..3000 ms. If the first retry 853 // still fails, we will wait 3000 ms grace period before the 2nd retry. 854 // Also at the second retry, the waiting window is expanded to 6000 ms 855 // alleviating the request rate from the server. Similarly the 3rd retry 856 // will wait 6000ms grace period before retry and the waiting window is 857 // expanded to 9000ms. 858 final int timeWindow = dfsClient.getConf().timeWindow; 859 double waitTime = timeWindow * failures + // grace period for the last round of attempt 860 timeWindow * (failures + 1) * DFSUtil.getRandom().nextDouble(); // expanding time window for each failure 861 DFSClient.LOG.warn("DFS chooseDataNode: got # " + (failures + 1) + " IOException, will wait for " + waitTime + " msec."); 862 Thread.sleep((long)waitTime); 863 } catch (InterruptedException iex) { 864 } 865 deadNodes.clear(); //2nd option is to remove only nodes[blockId] 866 openInfo(); 867 block = getBlockAt(block.getStartOffset(), false); 868 failures++; 869 continue; 870 } 871 } 872 } 873 874 private void fetchBlockByteRange(LocatedBlock block, long start, long end, 875 byte[] buf, int offset, 876 Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap) 877 throws IOException { 878 // 879 // Connect to best DataNode for desired Block, with potential offset 880 // 881 int refetchToken = 1; // only need to get a new access token once 882 int refetchEncryptionKey = 1; // only need to get a new encryption key once 883 884 while (true) { 885 // cached block locations may have been updated by chooseDataNode() 886 // or fetchBlockAt(). Always get the latest list of locations at the 887 // start of the loop. 888 block = getBlockAt(block.getStartOffset(), false); 889 DNAddrPair retval = chooseDataNode(block); 890 DatanodeInfo chosenNode = retval.info; 891 InetSocketAddress targetAddr = retval.addr; 892 BlockReader reader = null; 893 894 try { 895 Token<BlockTokenIdentifier> blockToken = block.getBlockToken(); 896 897 int len = (int) (end - start + 1); 898 reader = getBlockReader(targetAddr, chosenNode, src, block.getBlock(), 899 blockToken, start, len, buffersize, verifyChecksum, 900 dfsClient.clientName); 901 int nread = reader.readAll(buf, offset, len); 902 if (nread != len) { 903 throw new IOException("truncated return from reader.read(): " + 904 "excpected " + len + ", got " + nread); 905 } 906 return; 907 } catch (ChecksumException e) { 908 DFSClient.LOG.warn("fetchBlockByteRange(). Got a checksum exception for " + 909 src + " at " + block.getBlock() + ":" + 910 e.getPos() + " from " + chosenNode); 911 // we want to remember what we have tried 912 addIntoCorruptedBlockMap(block.getBlock(), chosenNode, corruptedBlockMap); 913 } catch (AccessControlException ex) { 914 DFSClient.LOG.warn("Short circuit access failed " + ex); 915 dfsClient.disableLegacyBlockReaderLocal(); 916 continue; 917 } catch (IOException e) { 918 if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) { 919 DFSClient.LOG.info("Will fetch a new encryption key and retry, " 920 + "encryption key was invalid when connecting to " + targetAddr 921 + " : " + e); 922 // The encryption key used is invalid. 923 refetchEncryptionKey--; 924 dfsClient.clearDataEncryptionKey(); 925 } else if (e instanceof InvalidBlockTokenException && refetchToken > 0) { 926 DFSClient.LOG.info("Will get a new access token and retry, " 927 + "access token was invalid when connecting to " + targetAddr 928 + " : " + e); 929 refetchToken--; 930 fetchBlockAt(block.getStartOffset()); 931 continue; 932 } else { 933 DFSClient.LOG.warn("Failed to connect to " + targetAddr + 934 " for file " + src + " for block " + block.getBlock() + ":" + e); 935 if (DFSClient.LOG.isDebugEnabled()) { 936 DFSClient.LOG.debug("Connection failure ", e); 937 } 938 } 939 } finally { 940 if (reader != null) { 941 reader.close(); 942 } 943 } 944 // Put chosen node into dead list, continue 945 addToDeadNodes(chosenNode); 946 } 947 } 948 949 private Peer newTcpPeer(InetSocketAddress addr) throws IOException { 950 Peer peer = null; 951 boolean success = false; 952 Socket sock = null; 953 try { 954 sock = dfsClient.socketFactory.createSocket(); 955 NetUtils.connect(sock, addr, 956 dfsClient.getRandomLocalInterfaceAddr(), 957 dfsClient.getConf().socketTimeout); 958 peer = TcpPeerServer.peerFromSocketAndKey(sock, 959 dfsClient.getDataEncryptionKey()); 960 success = true; 961 return peer; 962 } finally { 963 if (!success) { 964 IOUtils.closeQuietly(peer); 965 IOUtils.closeQuietly(sock); 966 } 967 } 968 } 969 970 /** 971 * Retrieve a BlockReader suitable for reading. 972 * This method will reuse the cached connection to the DN if appropriate. 973 * Otherwise, it will create a new connection. 974 * Throwing an IOException from this method is basically equivalent to 975 * declaring the DataNode bad, so we try to connect a lot of different ways 976 * before doing that. 977 * 978 * @param dnAddr Address of the datanode 979 * @param chosenNode Chosen datanode information 980 * @param file File location 981 * @param block The Block object 982 * @param blockToken The access token for security 983 * @param startOffset The read offset, relative to block head 984 * @param len The number of bytes to read 985 * @param bufferSize The IO buffer size (not the client buffer size) 986 * @param verifyChecksum Whether to verify checksum 987 * @param clientName Client name 988 * @return New BlockReader instance 989 */ 990 protected BlockReader getBlockReader(InetSocketAddress dnAddr, 991 DatanodeInfo chosenNode, 992 String file, 993 ExtendedBlock block, 994 Token<BlockTokenIdentifier> blockToken, 995 long startOffset, 996 long len, 997 int bufferSize, 998 boolean verifyChecksum, 999 String clientName) 1000 throws IOException { 1001 // Firstly, we check to see if we have cached any file descriptors for 1002 // local blocks. If so, we can just re-use those file descriptors. 1003 FileInputStream fis[] = fileInputStreamCache.get(chosenNode, block); 1004 if (fis != null) { 1005 if (DFSClient.LOG.isDebugEnabled()) { 1006 DFSClient.LOG.debug("got FileInputStreams for " + block + " from " + 1007 "the FileInputStreamCache."); 1008 } 1009 return new BlockReaderLocal(dfsClient.getConf(), file, 1010 block, startOffset, len, fis[0], fis[1], chosenNode, verifyChecksum, 1011 fileInputStreamCache); 1012 } 1013 1014 // If the legacy local block reader is enabled and we are reading a local 1015 // block, try to create a BlockReaderLocalLegacy. The legacy local block 1016 // reader implements local reads in the style first introduced by HDFS-2246. 1017 if ((dfsClient.useLegacyBlockReaderLocal()) && 1018 DFSClient.isLocalAddress(dnAddr) && 1019 (!shortCircuitForbidden())) { 1020 try { 1021 return BlockReaderFactory.getLegacyBlockReaderLocal(dfsClient, 1022 clientName, block, blockToken, chosenNode, startOffset); 1023 } catch (IOException e) { 1024 DFSClient.LOG.warn("error creating legacy BlockReaderLocal. " + 1025 "Disabling legacy local reads.", e); 1026 dfsClient.disableLegacyBlockReaderLocal(); 1027 } 1028 } 1029 1030 // Look for cached domain peers. 1031 int cacheTries = 0; 1032 DomainSocketFactory dsFactory = dfsClient.getDomainSocketFactory(); 1033 BlockReader reader = null; 1034 final int nCachedConnRetry = dfsClient.getConf().nCachedConnRetry; 1035 for (; cacheTries < nCachedConnRetry; ++cacheTries) { 1036 Peer peer = peerCache.get(chosenNode, true); 1037 if (peer == null) break; 1038 try { 1039 boolean allowShortCircuitLocalReads = dfsClient.getConf(). 1040 shortCircuitLocalReads && (!shortCircuitForbidden()); 1041 reader = BlockReaderFactory.newBlockReader( 1042 dfsClient.getConf(), file, block, blockToken, startOffset, 1043 len, verifyChecksum, clientName, peer, chosenNode, 1044 dsFactory, peerCache, fileInputStreamCache, 1045 allowShortCircuitLocalReads, cachingStrategy); 1046 return reader; 1047 } catch (IOException ex) { 1048 DFSClient.LOG.debug("Error making BlockReader with DomainSocket. " + 1049 "Closing stale " + peer, ex); 1050 } finally { 1051 if (reader == null) { 1052 IOUtils.closeQuietly(peer); 1053 } 1054 } 1055 } 1056 1057 // Try to create a DomainPeer. 1058 DomainSocket domSock = dsFactory.create(dnAddr, this); 1059 if (domSock != null) { 1060 Peer peer = new DomainPeer(domSock); 1061 try { 1062 boolean allowShortCircuitLocalReads = dfsClient.getConf(). 1063 shortCircuitLocalReads && (!shortCircuitForbidden()); 1064 reader = BlockReaderFactory.newBlockReader( 1065 dfsClient.getConf(), file, block, blockToken, startOffset, 1066 len, verifyChecksum, clientName, peer, chosenNode, 1067 dsFactory, peerCache, fileInputStreamCache, 1068 allowShortCircuitLocalReads, cachingStrategy); 1069 return reader; 1070 } catch (IOException e) { 1071 DFSClient.LOG.warn("failed to connect to " + domSock, e); 1072 } finally { 1073 if (reader == null) { 1074 // If the Peer that we got the error from was a DomainPeer, 1075 // mark the socket path as bad, so that newDataSocket will not try 1076 // to re-open this socket for a while. 1077 dsFactory.disableDomainSocketPath(domSock.getPath()); 1078 IOUtils.closeQuietly(peer); 1079 } 1080 } 1081 } 1082 1083 // Look for cached peers. 1084 for (; cacheTries < nCachedConnRetry; ++cacheTries) { 1085 Peer peer = peerCache.get(chosenNode, false); 1086 if (peer == null) break; 1087 try { 1088 reader = BlockReaderFactory.newBlockReader( 1089 dfsClient.getConf(), file, block, blockToken, startOffset, 1090 len, verifyChecksum, clientName, peer, chosenNode, 1091 dsFactory, peerCache, fileInputStreamCache, false, 1092 cachingStrategy); 1093 return reader; 1094 } catch (IOException ex) { 1095 DFSClient.LOG.debug("Error making BlockReader. Closing stale " + 1096 peer, ex); 1097 } finally { 1098 if (reader == null) { 1099 IOUtils.closeQuietly(peer); 1100 } 1101 } 1102 } 1103 if (tcpReadsDisabledForTesting) { 1104 throw new IOException("TCP reads are disabled."); 1105 } 1106 // Try to create a new remote peer. 1107 Peer peer = newTcpPeer(dnAddr); 1108 return BlockReaderFactory.newBlockReader( 1109 dfsClient.getConf(), file, block, blockToken, startOffset, 1110 len, verifyChecksum, clientName, peer, chosenNode, 1111 dsFactory, peerCache, fileInputStreamCache, false, 1112 cachingStrategy); 1113 } 1114 1115 1116 /** 1117 * Read bytes starting from the specified position. 1118 * 1119 * @param position start read from this position 1120 * @param buffer read buffer 1121 * @param offset offset into buffer 1122 * @param length number of bytes to read 1123 * 1124 * @return actual number of bytes read 1125 */ 1126 @Override 1127 public int read(long position, byte[] buffer, int offset, int length) 1128 throws IOException { 1129 // sanity checks 1130 dfsClient.checkOpen(); 1131 if (closed) { 1132 throw new IOException("Stream closed"); 1133 } 1134 failures = 0; 1135 long filelen = getFileLength(); 1136 if ((position < 0) || (position >= filelen)) { 1137 return -1; 1138 } 1139 int realLen = length; 1140 if ((position + length) > filelen) { 1141 realLen = (int)(filelen - position); 1142 } 1143 1144 // determine the block and byte range within the block 1145 // corresponding to position and realLen 1146 List<LocatedBlock> blockRange = getBlockRange(position, realLen); 1147 int remaining = realLen; 1148 Map<ExtendedBlock,Set<DatanodeInfo>> corruptedBlockMap 1149 = new HashMap<ExtendedBlock, Set<DatanodeInfo>>(); 1150 for (LocatedBlock blk : blockRange) { 1151 long targetStart = position - blk.getStartOffset(); 1152 long bytesToRead = Math.min(remaining, blk.getBlockSize() - targetStart); 1153 try { 1154 fetchBlockByteRange(blk, targetStart, 1155 targetStart + bytesToRead - 1, buffer, offset, corruptedBlockMap); 1156 } finally { 1157 // Check and report if any block replicas are corrupted. 1158 // BlockMissingException may be caught if all block replicas are 1159 // corrupted. 1160 reportCheckSumFailure(corruptedBlockMap, blk.getLocations().length); 1161 } 1162 1163 remaining -= bytesToRead; 1164 position += bytesToRead; 1165 offset += bytesToRead; 1166 } 1167 assert remaining == 0 : "Wrong number of bytes read."; 1168 if (dfsClient.stats != null) { 1169 dfsClient.stats.incrementBytesRead(realLen); 1170 } 1171 return realLen; 1172 } 1173 1174 /** 1175 * DFSInputStream reports checksum failure. 1176 * Case I : client has tried multiple data nodes and at least one of the 1177 * attempts has succeeded. We report the other failures as corrupted block to 1178 * namenode. 1179 * Case II: client has tried out all data nodes, but all failed. We 1180 * only report if the total number of replica is 1. We do not 1181 * report otherwise since this maybe due to the client is a handicapped client 1182 * (who can not read). 1183 * @param corruptedBlockMap map of corrupted blocks 1184 * @param dataNodeCount number of data nodes who contains the block replicas 1185 */ 1186 private void reportCheckSumFailure( 1187 Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap, 1188 int dataNodeCount) { 1189 if (corruptedBlockMap.isEmpty()) { 1190 return; 1191 } 1192 Iterator<Entry<ExtendedBlock, Set<DatanodeInfo>>> it = corruptedBlockMap 1193 .entrySet().iterator(); 1194 Entry<ExtendedBlock, Set<DatanodeInfo>> entry = it.next(); 1195 ExtendedBlock blk = entry.getKey(); 1196 Set<DatanodeInfo> dnSet = entry.getValue(); 1197 if (((dnSet.size() < dataNodeCount) && (dnSet.size() > 0)) 1198 || ((dataNodeCount == 1) && (dnSet.size() == dataNodeCount))) { 1199 DatanodeInfo[] locs = new DatanodeInfo[dnSet.size()]; 1200 int i = 0; 1201 for (DatanodeInfo dn:dnSet) { 1202 locs[i++] = dn; 1203 } 1204 LocatedBlock [] lblocks = { new LocatedBlock(blk, locs) }; 1205 dfsClient.reportChecksumFailure(src, lblocks); 1206 } 1207 corruptedBlockMap.clear(); 1208 } 1209 1210 @Override 1211 public long skip(long n) throws IOException { 1212 if ( n > 0 ) { 1213 long curPos = getPos(); 1214 long fileLen = getFileLength(); 1215 if( n+curPos > fileLen ) { 1216 n = fileLen - curPos; 1217 } 1218 seek(curPos+n); 1219 return n; 1220 } 1221 return n < 0 ? -1 : 0; 1222 } 1223 1224 /** 1225 * Seek to a new arbitrary location 1226 */ 1227 @Override 1228 public synchronized void seek(long targetPos) throws IOException { 1229 if (targetPos > getFileLength()) { 1230 throw new IOException("Cannot seek after EOF"); 1231 } 1232 if (targetPos < 0) { 1233 throw new IOException("Cannot seek to negative offset"); 1234 } 1235 if (closed) { 1236 throw new IOException("Stream is closed!"); 1237 } 1238 boolean done = false; 1239 if (pos <= targetPos && targetPos <= blockEnd) { 1240 // 1241 // If this seek is to a positive position in the current 1242 // block, and this piece of data might already be lying in 1243 // the TCP buffer, then just eat up the intervening data. 1244 // 1245 int diff = (int)(targetPos - pos); 1246 if (diff <= blockReader.available()) { 1247 try { 1248 pos += blockReader.skip(diff); 1249 if (pos == targetPos) { 1250 done = true; 1251 } 1252 } catch (IOException e) {//make following read to retry 1253 if(DFSClient.LOG.isDebugEnabled()) { 1254 DFSClient.LOG.debug("Exception while seek to " + targetPos 1255 + " from " + getCurrentBlock() + " of " + src + " from " 1256 + currentNode, e); 1257 } 1258 } 1259 } 1260 } 1261 if (!done) { 1262 pos = targetPos; 1263 blockEnd = -1; 1264 } 1265 } 1266 1267 /** 1268 * Same as {@link #seekToNewSource(long)} except that it does not exclude 1269 * the current datanode and might connect to the same node. 1270 */ 1271 private synchronized boolean seekToBlockSource(long targetPos) 1272 throws IOException { 1273 currentNode = blockSeekTo(targetPos); 1274 return true; 1275 } 1276 1277 /** 1278 * Seek to given position on a node other than the current node. If 1279 * a node other than the current node is found, then returns true. 1280 * If another node could not be found, then returns false. 1281 */ 1282 @Override 1283 public synchronized boolean seekToNewSource(long targetPos) throws IOException { 1284 boolean markedDead = deadNodes.containsKey(currentNode); 1285 addToDeadNodes(currentNode); 1286 DatanodeInfo oldNode = currentNode; 1287 DatanodeInfo newNode = blockSeekTo(targetPos); 1288 if (!markedDead) { 1289 /* remove it from deadNodes. blockSeekTo could have cleared 1290 * deadNodes and added currentNode again. Thats ok. */ 1291 deadNodes.remove(oldNode); 1292 } 1293 if (!oldNode.getStorageID().equals(newNode.getStorageID())) { 1294 currentNode = newNode; 1295 return true; 1296 } else { 1297 return false; 1298 } 1299 } 1300 1301 /** 1302 */ 1303 @Override 1304 public synchronized long getPos() throws IOException { 1305 return pos; 1306 } 1307 1308 /** Return the size of the remaining available bytes 1309 * if the size is less than or equal to {@link Integer#MAX_VALUE}, 1310 * otherwise, return {@link Integer#MAX_VALUE}. 1311 */ 1312 @Override 1313 public synchronized int available() throws IOException { 1314 if (closed) { 1315 throw new IOException("Stream closed"); 1316 } 1317 1318 final long remaining = getFileLength() - pos; 1319 return remaining <= Integer.MAX_VALUE? (int)remaining: Integer.MAX_VALUE; 1320 } 1321 1322 /** 1323 * We definitely don't support marks 1324 */ 1325 @Override 1326 public boolean markSupported() { 1327 return false; 1328 } 1329 @Override 1330 public void mark(int readLimit) { 1331 } 1332 @Override 1333 public void reset() throws IOException { 1334 throw new IOException("Mark/reset not supported"); 1335 } 1336 1337 /** 1338 * Pick the best node from which to stream the data. 1339 * Entries in <i>nodes</i> are already in the priority order 1340 */ 1341 static DatanodeInfo bestNode(DatanodeInfo nodes[], 1342 AbstractMap<DatanodeInfo, DatanodeInfo> deadNodes) 1343 throws IOException { 1344 if (nodes != null) { 1345 for (int i = 0; i < nodes.length; i++) { 1346 if (!deadNodes.containsKey(nodes[i])) { 1347 return nodes[i]; 1348 } 1349 } 1350 } 1351 throw new IOException("No live nodes contain current block"); 1352 } 1353 1354 /** Utility class to encapsulate data node info and its address. */ 1355 static class DNAddrPair { 1356 DatanodeInfo info; 1357 InetSocketAddress addr; 1358 DNAddrPair(DatanodeInfo info, InetSocketAddress addr) { 1359 this.info = info; 1360 this.addr = addr; 1361 } 1362 } 1363 1364 /** 1365 * Get statistics about the reads which this DFSInputStream has done. 1366 */ 1367 public synchronized ReadStatistics getReadStatistics() { 1368 return new ReadStatistics(readStatistics); 1369 } 1370 1371 private synchronized void closeCurrentBlockReader() { 1372 if (blockReader == null) return; 1373 // Close the current block reader so that the new caching settings can 1374 // take effect immediately. 1375 try { 1376 blockReader.close(); 1377 } catch (IOException e) { 1378 DFSClient.LOG.error("error closing blockReader", e); 1379 } 1380 blockReader = null; 1381 } 1382 1383 @Override 1384 public synchronized void setReadahead(Long readahead) 1385 throws IOException { 1386 this.cachingStrategy.setReadahead(readahead); 1387 closeCurrentBlockReader(); 1388 } 1389 1390 @Override 1391 public synchronized void setDropBehind(Boolean dropBehind) 1392 throws IOException { 1393 this.cachingStrategy.setDropBehind(dropBehind); 1394 closeCurrentBlockReader(); 1395 } 1396 }