001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with this 004 * work for additional information regarding copyright ownership. The ASF 005 * licenses this file to you under the Apache License, Version 2.0 (the 006 * "License"); you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations under 015 * the License. 016 */ 017 018 package org.apache.hadoop.io.file.tfile; 019 020 import java.io.ByteArrayInputStream; 021 import java.io.Closeable; 022 import java.io.DataInput; 023 import java.io.DataInputStream; 024 import java.io.DataOutput; 025 import java.io.DataOutputStream; 026 import java.io.EOFException; 027 import java.io.IOException; 028 import java.io.OutputStream; 029 import java.util.ArrayList; 030 import java.util.Comparator; 031 032 import org.apache.commons.logging.Log; 033 import org.apache.commons.logging.LogFactory; 034 import org.apache.hadoop.classification.InterfaceAudience; 035 import org.apache.hadoop.classification.InterfaceStability; 036 import org.apache.hadoop.conf.Configuration; 037 import org.apache.hadoop.fs.FSDataInputStream; 038 import org.apache.hadoop.fs.FSDataOutputStream; 039 import org.apache.hadoop.io.BoundedByteArrayOutputStream; 040 import org.apache.hadoop.io.BytesWritable; 041 import org.apache.hadoop.io.DataInputBuffer; 042 import org.apache.hadoop.io.DataOutputBuffer; 043 import org.apache.hadoop.io.IOUtils; 044 import org.apache.hadoop.io.RawComparator; 045 import org.apache.hadoop.io.WritableComparator; 046 import org.apache.hadoop.io.file.tfile.BCFile.Reader.BlockReader; 047 import org.apache.hadoop.io.file.tfile.BCFile.Writer.BlockAppender; 048 import org.apache.hadoop.io.file.tfile.Chunk.ChunkDecoder; 049 import org.apache.hadoop.io.file.tfile.Chunk.ChunkEncoder; 050 import org.apache.hadoop.io.file.tfile.CompareUtils.BytesComparator; 051 import org.apache.hadoop.io.file.tfile.CompareUtils.MemcmpRawComparator; 052 import org.apache.hadoop.io.file.tfile.Utils.Version; 053 import org.apache.hadoop.io.serializer.JavaSerializationComparator; 054 055 /** 056 * A TFile is a container of key-value pairs. Both keys and values are type-less 057 * bytes. Keys are restricted to 64KB, value length is not restricted 058 * (practically limited to the available disk storage). TFile further provides 059 * the following features: 060 * <ul> 061 * <li>Block Compression. 062 * <li>Named meta data blocks. 063 * <li>Sorted or unsorted keys. 064 * <li>Seek by key or by file offset. 065 * </ul> 066 * The memory footprint of a TFile includes the following: 067 * <ul> 068 * <li>Some constant overhead of reading or writing a compressed block. 069 * <ul> 070 * <li>Each compressed block requires one compression/decompression codec for 071 * I/O. 072 * <li>Temporary space to buffer the key. 073 * <li>Temporary space to buffer the value (for TFile.Writer only). Values are 074 * chunk encoded, so that we buffer at most one chunk of user data. By default, 075 * the chunk buffer is 1MB. Reading chunked value does not require additional 076 * memory. 077 * </ul> 078 * <li>TFile index, which is proportional to the total number of Data Blocks. 079 * The total amount of memory needed to hold the index can be estimated as 080 * (56+AvgKeySize)*NumBlocks. 081 * <li>MetaBlock index, which is proportional to the total number of Meta 082 * Blocks.The total amount of memory needed to hold the index for Meta Blocks 083 * can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. 084 * </ul> 085 * <p> 086 * The behavior of TFile can be customized by the following variables through 087 * Configuration: 088 * <ul> 089 * <li><b>tfile.io.chunk.size</b>: Value chunk size. Integer (in bytes). Default 090 * to 1MB. Values of the length less than the chunk size is guaranteed to have 091 * known value length in read time (See 092 * {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). 093 * <li><b>tfile.fs.output.buffer.size</b>: Buffer size used for 094 * FSDataOutputStream. Integer (in bytes). Default to 256KB. 095 * <li><b>tfile.fs.input.buffer.size</b>: Buffer size used for 096 * FSDataInputStream. Integer (in bytes). Default to 256KB. 097 * </ul> 098 * <p> 099 * Suggestions on performance optimization. 100 * <ul> 101 * <li>Minimum block size. We recommend a setting of minimum block size between 102 * 256KB to 1MB for general usage. Larger block size is preferred if files are 103 * primarily for sequential access. However, it would lead to inefficient random 104 * access (because there are more data to decompress). Smaller blocks are good 105 * for random access, but require more memory to hold the block index, and may 106 * be slower to create (because we must flush the compressor stream at the 107 * conclusion of each data block, which leads to an FS I/O flush). Further, due 108 * to the internal caching in Compression codec, the smallest possible block 109 * size would be around 20KB-30KB. 110 * <li>The current implementation does not offer true multi-threading for 111 * reading. The implementation uses FSDataInputStream seek()+read(), which is 112 * shown to be much faster than positioned-read call in single thread mode. 113 * However, it also means that if multiple threads attempt to access the same 114 * TFile (using multiple scanners) simultaneously, the actual I/O is carried out 115 * sequentially even if they access different DFS blocks. 116 * <li>Compression codec. Use "none" if the data is not very compressable (by 117 * compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" 118 * as the starting point for experimenting. "gz" overs slightly better 119 * compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to 120 * decompress, comparing to "lzo". 121 * <li>File system buffering, if the underlying FSDataInputStream and 122 * FSDataOutputStream is already adequately buffered; or if applications 123 * reads/writes keys and values in large buffers, we can reduce the sizes of 124 * input/output buffering in TFile layer by setting the configuration parameters 125 * "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". 126 * </ul> 127 * 128 * Some design rationale behind TFile can be found at <a 129 * href=https://issues.apache.org/jira/browse/HADOOP-3315>Hadoop-3315</a>. 130 */ 131 @InterfaceAudience.Public 132 @InterfaceStability.Evolving 133 public class TFile { 134 static final Log LOG = LogFactory.getLog(TFile.class); 135 136 private static final String CHUNK_BUF_SIZE_ATTR = "tfile.io.chunk.size"; 137 private static final String FS_INPUT_BUF_SIZE_ATTR = 138 "tfile.fs.input.buffer.size"; 139 private static final String FS_OUTPUT_BUF_SIZE_ATTR = 140 "tfile.fs.output.buffer.size"; 141 142 static int getChunkBufferSize(Configuration conf) { 143 int ret = conf.getInt(CHUNK_BUF_SIZE_ATTR, 1024 * 1024); 144 return (ret > 0) ? ret : 1024 * 1024; 145 } 146 147 static int getFSInputBufferSize(Configuration conf) { 148 return conf.getInt(FS_INPUT_BUF_SIZE_ATTR, 256 * 1024); 149 } 150 151 static int getFSOutputBufferSize(Configuration conf) { 152 return conf.getInt(FS_OUTPUT_BUF_SIZE_ATTR, 256 * 1024); 153 } 154 155 private static final int MAX_KEY_SIZE = 64 * 1024; // 64KB 156 static final Version API_VERSION = new Version((short) 1, (short) 0); 157 158 /** compression: gzip */ 159 public static final String COMPRESSION_GZ = "gz"; 160 /** compression: lzo */ 161 public static final String COMPRESSION_LZO = "lzo"; 162 /** compression: none */ 163 public static final String COMPRESSION_NONE = "none"; 164 /** comparator: memcmp */ 165 public static final String COMPARATOR_MEMCMP = "memcmp"; 166 /** comparator prefix: java class */ 167 public static final String COMPARATOR_JCLASS = "jclass:"; 168 169 /** 170 * Make a raw comparator from a string name. 171 * 172 * @param name 173 * Comparator name 174 * @return A RawComparable comparator. 175 */ 176 static public Comparator<RawComparable> makeComparator(String name) { 177 return TFileMeta.makeComparator(name); 178 } 179 180 // Prevent the instantiation of TFiles 181 private TFile() { 182 // nothing 183 } 184 185 /** 186 * Get names of supported compression algorithms. The names are acceptable by 187 * TFile.Writer. 188 * 189 * @return Array of strings, each represents a supported compression 190 * algorithm. Currently, the following compression algorithms are 191 * supported. 192 * <ul> 193 * <li>"none" - No compression. 194 * <li>"lzo" - LZO compression. 195 * <li>"gz" - GZIP compression. 196 * </ul> 197 */ 198 public static String[] getSupportedCompressionAlgorithms() { 199 return Compression.getSupportedAlgorithms(); 200 } 201 202 /** 203 * TFile Writer. 204 */ 205 @InterfaceStability.Evolving 206 public static class Writer implements Closeable { 207 // minimum compressed size for a block. 208 private final int sizeMinBlock; 209 210 // Meta blocks. 211 final TFileIndex tfileIndex; 212 final TFileMeta tfileMeta; 213 214 // reference to the underlying BCFile. 215 private BCFile.Writer writerBCF; 216 217 // current data block appender. 218 BlockAppender blkAppender; 219 long blkRecordCount; 220 221 // buffers for caching the key. 222 BoundedByteArrayOutputStream currentKeyBufferOS; 223 BoundedByteArrayOutputStream lastKeyBufferOS; 224 225 // buffer used by chunk codec 226 private byte[] valueBuffer; 227 228 /** 229 * Writer states. The state always transits in circles: READY -> IN_KEY -> 230 * END_KEY -> IN_VALUE -> READY. 231 */ 232 private enum State { 233 READY, // Ready to start a new key-value pair insertion. 234 IN_KEY, // In the middle of key insertion. 235 END_KEY, // Key insertion complete, ready to insert value. 236 IN_VALUE, // In value insertion. 237 // ERROR, // Error encountered, cannot continue. 238 CLOSED, // TFile already closed. 239 }; 240 241 // current state of Writer. 242 State state = State.READY; 243 Configuration conf; 244 long errorCount = 0; 245 246 /** 247 * Constructor 248 * 249 * @param fsdos 250 * output stream for writing. Must be at position 0. 251 * @param minBlockSize 252 * Minimum compressed block size in bytes. A compression block will 253 * not be closed until it reaches this size except for the last 254 * block. 255 * @param compressName 256 * Name of the compression algorithm. Must be one of the strings 257 * returned by {@link TFile#getSupportedCompressionAlgorithms()}. 258 * @param comparator 259 * Leave comparator as null or empty string if TFile is not sorted. 260 * Otherwise, provide the string name for the comparison algorithm 261 * for keys. Two kinds of comparators are supported. 262 * <ul> 263 * <li>Algorithmic comparator: binary comparators that is language 264 * independent. Currently, only "memcmp" is supported. 265 * <li>Language-specific comparator: binary comparators that can 266 * only be constructed in specific language. For Java, the syntax 267 * is "jclass:", followed by the class name of the RawComparator. 268 * Currently, we only support RawComparators that can be 269 * constructed through the default constructor (with no 270 * parameters). Parameterized RawComparators such as 271 * {@link WritableComparator} or 272 * {@link JavaSerializationComparator} may not be directly used. 273 * One should write a wrapper class that inherits from such classes 274 * and use its default constructor to perform proper 275 * initialization. 276 * </ul> 277 * @param conf 278 * The configuration object. 279 * @throws IOException 280 */ 281 public Writer(FSDataOutputStream fsdos, int minBlockSize, 282 String compressName, String comparator, Configuration conf) 283 throws IOException { 284 sizeMinBlock = minBlockSize; 285 tfileMeta = new TFileMeta(comparator); 286 tfileIndex = new TFileIndex(tfileMeta.getComparator()); 287 288 writerBCF = new BCFile.Writer(fsdos, compressName, conf); 289 currentKeyBufferOS = new BoundedByteArrayOutputStream(MAX_KEY_SIZE); 290 lastKeyBufferOS = new BoundedByteArrayOutputStream(MAX_KEY_SIZE); 291 this.conf = conf; 292 } 293 294 /** 295 * Close the Writer. Resources will be released regardless of the exceptions 296 * being thrown. Future close calls will have no effect. 297 * 298 * The underlying FSDataOutputStream is not closed. 299 */ 300 public void close() throws IOException { 301 if ((state == State.CLOSED)) { 302 return; 303 } 304 try { 305 // First try the normal finish. 306 // Terminate upon the first Exception. 307 if (errorCount == 0) { 308 if (state != State.READY) { 309 throw new IllegalStateException( 310 "Cannot close TFile in the middle of key-value insertion."); 311 } 312 313 finishDataBlock(true); 314 315 // first, write out data:TFile.meta 316 BlockAppender outMeta = 317 writerBCF 318 .prepareMetaBlock(TFileMeta.BLOCK_NAME, COMPRESSION_NONE); 319 try { 320 tfileMeta.write(outMeta); 321 } finally { 322 outMeta.close(); 323 } 324 325 // second, write out data:TFile.index 326 BlockAppender outIndex = 327 writerBCF.prepareMetaBlock(TFileIndex.BLOCK_NAME); 328 try { 329 tfileIndex.write(outIndex); 330 } finally { 331 outIndex.close(); 332 } 333 334 writerBCF.close(); 335 } 336 } finally { 337 IOUtils.cleanup(LOG, blkAppender, writerBCF); 338 blkAppender = null; 339 writerBCF = null; 340 state = State.CLOSED; 341 } 342 } 343 344 /** 345 * Adding a new key-value pair to the TFile. This is synonymous to 346 * append(key, 0, key.length, value, 0, value.length) 347 * 348 * @param key 349 * Buffer for key. 350 * @param value 351 * Buffer for value. 352 * @throws IOException 353 */ 354 public void append(byte[] key, byte[] value) throws IOException { 355 append(key, 0, key.length, value, 0, value.length); 356 } 357 358 /** 359 * Adding a new key-value pair to TFile. 360 * 361 * @param key 362 * buffer for key. 363 * @param koff 364 * offset in key buffer. 365 * @param klen 366 * length of key. 367 * @param value 368 * buffer for value. 369 * @param voff 370 * offset in value buffer. 371 * @param vlen 372 * length of value. 373 * @throws IOException 374 * Upon IO errors. 375 * <p> 376 * If an exception is thrown, the TFile will be in an inconsistent 377 * state. The only legitimate call after that would be close 378 */ 379 public void append(byte[] key, int koff, int klen, byte[] value, int voff, 380 int vlen) throws IOException { 381 if ((koff | klen | (koff + klen) | (key.length - (koff + klen))) < 0) { 382 throw new IndexOutOfBoundsException( 383 "Bad key buffer offset-length combination."); 384 } 385 386 if ((voff | vlen | (voff + vlen) | (value.length - (voff + vlen))) < 0) { 387 throw new IndexOutOfBoundsException( 388 "Bad value buffer offset-length combination."); 389 } 390 391 try { 392 DataOutputStream dosKey = prepareAppendKey(klen); 393 try { 394 ++errorCount; 395 dosKey.write(key, koff, klen); 396 --errorCount; 397 } finally { 398 dosKey.close(); 399 } 400 401 DataOutputStream dosValue = prepareAppendValue(vlen); 402 try { 403 ++errorCount; 404 dosValue.write(value, voff, vlen); 405 --errorCount; 406 } finally { 407 dosValue.close(); 408 } 409 } finally { 410 state = State.READY; 411 } 412 } 413 414 /** 415 * Helper class to register key after close call on key append stream. 416 */ 417 private class KeyRegister extends DataOutputStream { 418 private final int expectedLength; 419 private boolean closed = false; 420 421 public KeyRegister(int len) { 422 super(currentKeyBufferOS); 423 if (len >= 0) { 424 currentKeyBufferOS.reset(len); 425 } else { 426 currentKeyBufferOS.reset(); 427 } 428 expectedLength = len; 429 } 430 431 @Override 432 public void close() throws IOException { 433 if (closed == true) { 434 return; 435 } 436 437 try { 438 ++errorCount; 439 byte[] key = currentKeyBufferOS.getBuffer(); 440 int len = currentKeyBufferOS.size(); 441 /** 442 * verify length. 443 */ 444 if (expectedLength >= 0 && expectedLength != len) { 445 throw new IOException("Incorrect key length: expected=" 446 + expectedLength + " actual=" + len); 447 } 448 449 Utils.writeVInt(blkAppender, len); 450 blkAppender.write(key, 0, len); 451 if (tfileIndex.getFirstKey() == null) { 452 tfileIndex.setFirstKey(key, 0, len); 453 } 454 455 if (tfileMeta.isSorted() && tfileMeta.getRecordCount()>0) { 456 byte[] lastKey = lastKeyBufferOS.getBuffer(); 457 int lastLen = lastKeyBufferOS.size(); 458 if (tfileMeta.getComparator().compare(key, 0, len, lastKey, 0, 459 lastLen) < 0) { 460 throw new IOException("Keys are not added in sorted order"); 461 } 462 } 463 464 BoundedByteArrayOutputStream tmp = currentKeyBufferOS; 465 currentKeyBufferOS = lastKeyBufferOS; 466 lastKeyBufferOS = tmp; 467 --errorCount; 468 } finally { 469 closed = true; 470 state = State.END_KEY; 471 } 472 } 473 } 474 475 /** 476 * Helper class to register value after close call on value append stream. 477 */ 478 private class ValueRegister extends DataOutputStream { 479 private boolean closed = false; 480 481 public ValueRegister(OutputStream os) { 482 super(os); 483 } 484 485 // Avoiding flushing call to down stream. 486 @Override 487 public void flush() { 488 // do nothing 489 } 490 491 @Override 492 public void close() throws IOException { 493 if (closed == true) { 494 return; 495 } 496 497 try { 498 ++errorCount; 499 super.close(); 500 blkRecordCount++; 501 // bump up the total record count in the whole file 502 tfileMeta.incRecordCount(); 503 finishDataBlock(false); 504 --errorCount; 505 } finally { 506 closed = true; 507 state = State.READY; 508 } 509 } 510 } 511 512 /** 513 * Obtain an output stream for writing a key into TFile. This may only be 514 * called when there is no active Key appending stream or value appending 515 * stream. 516 * 517 * @param length 518 * The expected length of the key. If length of the key is not 519 * known, set length = -1. Otherwise, the application must write 520 * exactly as many bytes as specified here before calling close on 521 * the returned output stream. 522 * @return The key appending output stream. 523 * @throws IOException 524 * 525 */ 526 public DataOutputStream prepareAppendKey(int length) throws IOException { 527 if (state != State.READY) { 528 throw new IllegalStateException("Incorrect state to start a new key: " 529 + state.name()); 530 } 531 532 initDataBlock(); 533 DataOutputStream ret = new KeyRegister(length); 534 state = State.IN_KEY; 535 return ret; 536 } 537 538 /** 539 * Obtain an output stream for writing a value into TFile. This may only be 540 * called right after a key appending operation (the key append stream must 541 * be closed). 542 * 543 * @param length 544 * The expected length of the value. If length of the value is not 545 * known, set length = -1. Otherwise, the application must write 546 * exactly as many bytes as specified here before calling close on 547 * the returned output stream. Advertising the value size up-front 548 * guarantees that the value is encoded in one chunk, and avoids 549 * intermediate chunk buffering. 550 * @throws IOException 551 * 552 */ 553 public DataOutputStream prepareAppendValue(int length) throws IOException { 554 if (state != State.END_KEY) { 555 throw new IllegalStateException( 556 "Incorrect state to start a new value: " + state.name()); 557 } 558 559 DataOutputStream ret; 560 561 // unknown length 562 if (length < 0) { 563 if (valueBuffer == null) { 564 valueBuffer = new byte[getChunkBufferSize(conf)]; 565 } 566 ret = new ValueRegister(new ChunkEncoder(blkAppender, valueBuffer)); 567 } else { 568 ret = 569 new ValueRegister(new Chunk.SingleChunkEncoder(blkAppender, length)); 570 } 571 572 state = State.IN_VALUE; 573 return ret; 574 } 575 576 /** 577 * Obtain an output stream for creating a meta block. This function may not 578 * be called when there is a key append stream or value append stream 579 * active. No more key-value insertion is allowed after a meta data block 580 * has been added to TFile. 581 * 582 * @param name 583 * Name of the meta block. 584 * @param compressName 585 * Name of the compression algorithm to be used. Must be one of the 586 * strings returned by 587 * {@link TFile#getSupportedCompressionAlgorithms()}. 588 * @return A DataOutputStream that can be used to write Meta Block data. 589 * Closing the stream would signal the ending of the block. 590 * @throws IOException 591 * @throws MetaBlockAlreadyExists 592 * the Meta Block with the same name already exists. 593 */ 594 public DataOutputStream prepareMetaBlock(String name, String compressName) 595 throws IOException, MetaBlockAlreadyExists { 596 if (state != State.READY) { 597 throw new IllegalStateException( 598 "Incorrect state to start a Meta Block: " + state.name()); 599 } 600 601 finishDataBlock(true); 602 DataOutputStream outputStream = 603 writerBCF.prepareMetaBlock(name, compressName); 604 return outputStream; 605 } 606 607 /** 608 * Obtain an output stream for creating a meta block. This function may not 609 * be called when there is a key append stream or value append stream 610 * active. No more key-value insertion is allowed after a meta data block 611 * has been added to TFile. Data will be compressed using the default 612 * compressor as defined in Writer's constructor. 613 * 614 * @param name 615 * Name of the meta block. 616 * @return A DataOutputStream that can be used to write Meta Block data. 617 * Closing the stream would signal the ending of the block. 618 * @throws IOException 619 * @throws MetaBlockAlreadyExists 620 * the Meta Block with the same name already exists. 621 */ 622 public DataOutputStream prepareMetaBlock(String name) throws IOException, 623 MetaBlockAlreadyExists { 624 if (state != State.READY) { 625 throw new IllegalStateException( 626 "Incorrect state to start a Meta Block: " + state.name()); 627 } 628 629 finishDataBlock(true); 630 return writerBCF.prepareMetaBlock(name); 631 } 632 633 /** 634 * Check if we need to start a new data block. 635 * 636 * @throws IOException 637 */ 638 private void initDataBlock() throws IOException { 639 // for each new block, get a new appender 640 if (blkAppender == null) { 641 blkAppender = writerBCF.prepareDataBlock(); 642 } 643 } 644 645 /** 646 * Close the current data block if necessary. 647 * 648 * @param bForceFinish 649 * Force the closure regardless of the block size. 650 * @throws IOException 651 */ 652 void finishDataBlock(boolean bForceFinish) throws IOException { 653 if (blkAppender == null) { 654 return; 655 } 656 657 // exceeded the size limit, do the compression and finish the block 658 if (bForceFinish || blkAppender.getCompressedSize() >= sizeMinBlock) { 659 // keep tracks of the last key of each data block, no padding 660 // for now 661 TFileIndexEntry keyLast = 662 new TFileIndexEntry(lastKeyBufferOS.getBuffer(), 0, lastKeyBufferOS 663 .size(), blkRecordCount); 664 tfileIndex.addEntry(keyLast); 665 // close the appender 666 blkAppender.close(); 667 blkAppender = null; 668 blkRecordCount = 0; 669 } 670 } 671 } 672 673 /** 674 * TFile Reader. Users may only read TFiles by creating TFile.Reader.Scanner. 675 * objects. A scanner may scan the whole TFile ({@link Reader#createScanner()} 676 * ) , a portion of TFile based on byte offsets ( 677 * {@link Reader#createScannerByByteRange(long, long)}), or a portion of TFile with keys 678 * fall in a certain key range (for sorted TFile only, 679 * {@link Reader#createScannerByKey(byte[], byte[])} or 680 * {@link Reader#createScannerByKey(RawComparable, RawComparable)}). 681 */ 682 @InterfaceStability.Evolving 683 public static class Reader implements Closeable { 684 // The underlying BCFile reader. 685 final BCFile.Reader readerBCF; 686 687 // TFile index, it is loaded lazily. 688 TFileIndex tfileIndex = null; 689 final TFileMeta tfileMeta; 690 final BytesComparator comparator; 691 692 // global begin and end locations. 693 private final Location begin; 694 private final Location end; 695 696 /** 697 * Location representing a virtual position in the TFile. 698 */ 699 static final class Location implements Comparable<Location>, Cloneable { 700 private int blockIndex; 701 // distance/offset from the beginning of the block 702 private long recordIndex; 703 704 Location(int blockIndex, long recordIndex) { 705 set(blockIndex, recordIndex); 706 } 707 708 void incRecordIndex() { 709 ++recordIndex; 710 } 711 712 Location(Location other) { 713 set(other); 714 } 715 716 int getBlockIndex() { 717 return blockIndex; 718 } 719 720 long getRecordIndex() { 721 return recordIndex; 722 } 723 724 void set(int blockIndex, long recordIndex) { 725 if ((blockIndex | recordIndex) < 0) { 726 throw new IllegalArgumentException( 727 "Illegal parameter for BlockLocation."); 728 } 729 this.blockIndex = blockIndex; 730 this.recordIndex = recordIndex; 731 } 732 733 void set(Location other) { 734 set(other.blockIndex, other.recordIndex); 735 } 736 737 /** 738 * @see java.lang.Comparable#compareTo(java.lang.Object) 739 */ 740 @Override 741 public int compareTo(Location other) { 742 return compareTo(other.blockIndex, other.recordIndex); 743 } 744 745 int compareTo(int bid, long rid) { 746 if (this.blockIndex == bid) { 747 long ret = this.recordIndex - rid; 748 if (ret > 0) return 1; 749 if (ret < 0) return -1; 750 return 0; 751 } 752 return this.blockIndex - bid; 753 } 754 755 /** 756 * @see java.lang.Object#clone() 757 */ 758 @Override 759 protected Location clone() { 760 return new Location(blockIndex, recordIndex); 761 } 762 763 /** 764 * @see java.lang.Object#hashCode() 765 */ 766 @Override 767 public int hashCode() { 768 final int prime = 31; 769 int result = prime + blockIndex; 770 result = (int) (prime * result + recordIndex); 771 return result; 772 } 773 774 /** 775 * @see java.lang.Object#equals(java.lang.Object) 776 */ 777 @Override 778 public boolean equals(Object obj) { 779 if (this == obj) return true; 780 if (obj == null) return false; 781 if (getClass() != obj.getClass()) return false; 782 Location other = (Location) obj; 783 if (blockIndex != other.blockIndex) return false; 784 if (recordIndex != other.recordIndex) return false; 785 return true; 786 } 787 } 788 789 /** 790 * Constructor 791 * 792 * @param fsdis 793 * FS input stream of the TFile. 794 * @param fileLength 795 * The length of TFile. This is required because we have no easy 796 * way of knowing the actual size of the input file through the 797 * File input stream. 798 * @param conf 799 * @throws IOException 800 */ 801 public Reader(FSDataInputStream fsdis, long fileLength, Configuration conf) 802 throws IOException { 803 readerBCF = new BCFile.Reader(fsdis, fileLength, conf); 804 805 // first, read TFile meta 806 BlockReader brMeta = readerBCF.getMetaBlock(TFileMeta.BLOCK_NAME); 807 try { 808 tfileMeta = new TFileMeta(brMeta); 809 } finally { 810 brMeta.close(); 811 } 812 813 comparator = tfileMeta.getComparator(); 814 // Set begin and end locations. 815 begin = new Location(0, 0); 816 end = new Location(readerBCF.getBlockCount(), 0); 817 } 818 819 /** 820 * Close the reader. The state of the Reader object is undefined after 821 * close. Calling close() for multiple times has no effect. 822 */ 823 public void close() throws IOException { 824 readerBCF.close(); 825 } 826 827 /** 828 * Get the begin location of the TFile. 829 * 830 * @return If TFile is not empty, the location of the first key-value pair. 831 * Otherwise, it returns end(). 832 */ 833 Location begin() { 834 return begin; 835 } 836 837 /** 838 * Get the end location of the TFile. 839 * 840 * @return The location right after the last key-value pair in TFile. 841 */ 842 Location end() { 843 return end; 844 } 845 846 /** 847 * Get the string representation of the comparator. 848 * 849 * @return If the TFile is not sorted by keys, an empty string will be 850 * returned. Otherwise, the actual comparator string that is 851 * provided during the TFile creation time will be returned. 852 */ 853 public String getComparatorName() { 854 return tfileMeta.getComparatorString(); 855 } 856 857 /** 858 * Is the TFile sorted? 859 * 860 * @return true if TFile is sorted. 861 */ 862 public boolean isSorted() { 863 return tfileMeta.isSorted(); 864 } 865 866 /** 867 * Get the number of key-value pair entries in TFile. 868 * 869 * @return the number of key-value pairs in TFile 870 */ 871 public long getEntryCount() { 872 return tfileMeta.getRecordCount(); 873 } 874 875 /** 876 * Lazily loading the TFile index. 877 * 878 * @throws IOException 879 */ 880 synchronized void checkTFileDataIndex() throws IOException { 881 if (tfileIndex == null) { 882 BlockReader brIndex = readerBCF.getMetaBlock(TFileIndex.BLOCK_NAME); 883 try { 884 tfileIndex = 885 new TFileIndex(readerBCF.getBlockCount(), brIndex, tfileMeta 886 .getComparator()); 887 } finally { 888 brIndex.close(); 889 } 890 } 891 } 892 893 /** 894 * Get the first key in the TFile. 895 * 896 * @return The first key in the TFile. 897 * @throws IOException 898 */ 899 public RawComparable getFirstKey() throws IOException { 900 checkTFileDataIndex(); 901 return tfileIndex.getFirstKey(); 902 } 903 904 /** 905 * Get the last key in the TFile. 906 * 907 * @return The last key in the TFile. 908 * @throws IOException 909 */ 910 public RawComparable getLastKey() throws IOException { 911 checkTFileDataIndex(); 912 return tfileIndex.getLastKey(); 913 } 914 915 /** 916 * Get a Comparator object to compare Entries. It is useful when you want 917 * stores the entries in a collection (such as PriorityQueue) and perform 918 * sorting or comparison among entries based on the keys without copying out 919 * the key. 920 * 921 * @return An Entry Comparator.. 922 */ 923 public Comparator<Scanner.Entry> getEntryComparator() { 924 if (!isSorted()) { 925 throw new RuntimeException( 926 "Entries are not comparable for unsorted TFiles"); 927 } 928 929 return new Comparator<Scanner.Entry>() { 930 /** 931 * Provide a customized comparator for Entries. This is useful if we 932 * have a collection of Entry objects. However, if the Entry objects 933 * come from different TFiles, users must ensure that those TFiles share 934 * the same RawComparator. 935 */ 936 @Override 937 public int compare(Scanner.Entry o1, Scanner.Entry o2) { 938 return comparator.compare(o1.getKeyBuffer(), 0, o1.getKeyLength(), o2 939 .getKeyBuffer(), 0, o2.getKeyLength()); 940 } 941 }; 942 } 943 944 /** 945 * Get an instance of the RawComparator that is constructed based on the 946 * string comparator representation. 947 * 948 * @return a Comparator that can compare RawComparable's. 949 */ 950 public Comparator<RawComparable> getComparator() { 951 return comparator; 952 } 953 954 /** 955 * Stream access to a meta block.`` 956 * 957 * @param name 958 * The name of the meta block. 959 * @return The input stream. 960 * @throws IOException 961 * on I/O error. 962 * @throws MetaBlockDoesNotExist 963 * If the meta block with the name does not exist. 964 */ 965 public DataInputStream getMetaBlock(String name) throws IOException, 966 MetaBlockDoesNotExist { 967 return readerBCF.getMetaBlock(name); 968 } 969 970 /** 971 * if greater is true then returns the beginning location of the block 972 * containing the key strictly greater than input key. if greater is false 973 * then returns the beginning location of the block greater than equal to 974 * the input key 975 * 976 * @param key 977 * the input key 978 * @param greater 979 * boolean flag 980 * @return 981 * @throws IOException 982 */ 983 Location getBlockContainsKey(RawComparable key, boolean greater) 984 throws IOException { 985 if (!isSorted()) { 986 throw new RuntimeException("Seeking in unsorted TFile"); 987 } 988 checkTFileDataIndex(); 989 int blkIndex = 990 (greater) ? tfileIndex.upperBound(key) : tfileIndex.lowerBound(key); 991 if (blkIndex < 0) return end; 992 return new Location(blkIndex, 0); 993 } 994 995 Location getLocationByRecordNum(long recNum) throws IOException { 996 checkTFileDataIndex(); 997 return tfileIndex.getLocationByRecordNum(recNum); 998 } 999 1000 long getRecordNumByLocation(Location location) throws IOException { 1001 checkTFileDataIndex(); 1002 return tfileIndex.getRecordNumByLocation(location); 1003 } 1004 1005 int compareKeys(byte[] a, int o1, int l1, byte[] b, int o2, int l2) { 1006 if (!isSorted()) { 1007 throw new RuntimeException("Cannot compare keys for unsorted TFiles."); 1008 } 1009 return comparator.compare(a, o1, l1, b, o2, l2); 1010 } 1011 1012 int compareKeys(RawComparable a, RawComparable b) { 1013 if (!isSorted()) { 1014 throw new RuntimeException("Cannot compare keys for unsorted TFiles."); 1015 } 1016 return comparator.compare(a, b); 1017 } 1018 1019 /** 1020 * Get the location pointing to the beginning of the first key-value pair in 1021 * a compressed block whose byte offset in the TFile is greater than or 1022 * equal to the specified offset. 1023 * 1024 * @param offset 1025 * the user supplied offset. 1026 * @return the location to the corresponding entry; or end() if no such 1027 * entry exists. 1028 */ 1029 Location getLocationNear(long offset) { 1030 int blockIndex = readerBCF.getBlockIndexNear(offset); 1031 if (blockIndex == -1) return end; 1032 return new Location(blockIndex, 0); 1033 } 1034 1035 /** 1036 * Get the RecordNum for the first key-value pair in a compressed block 1037 * whose byte offset in the TFile is greater than or equal to the specified 1038 * offset. 1039 * 1040 * @param offset 1041 * the user supplied offset. 1042 * @return the RecordNum to the corresponding entry. If no such entry 1043 * exists, it returns the total entry count. 1044 * @throws IOException 1045 */ 1046 public long getRecordNumNear(long offset) throws IOException { 1047 return getRecordNumByLocation(getLocationNear(offset)); 1048 } 1049 1050 /** 1051 * Get a sample key that is within a block whose starting offset is greater 1052 * than or equal to the specified offset. 1053 * 1054 * @param offset 1055 * The file offset. 1056 * @return the key that fits the requirement; or null if no such key exists 1057 * (which could happen if the offset is close to the end of the 1058 * TFile). 1059 * @throws IOException 1060 */ 1061 public RawComparable getKeyNear(long offset) throws IOException { 1062 int blockIndex = readerBCF.getBlockIndexNear(offset); 1063 if (blockIndex == -1) return null; 1064 checkTFileDataIndex(); 1065 return new ByteArray(tfileIndex.getEntry(blockIndex).key); 1066 } 1067 1068 /** 1069 * Get a scanner than can scan the whole TFile. 1070 * 1071 * @return The scanner object. A valid Scanner is always returned even if 1072 * the TFile is empty. 1073 * @throws IOException 1074 */ 1075 public Scanner createScanner() throws IOException { 1076 return new Scanner(this, begin, end); 1077 } 1078 1079 /** 1080 * Get a scanner that covers a portion of TFile based on byte offsets. 1081 * 1082 * @param offset 1083 * The beginning byte offset in the TFile. 1084 * @param length 1085 * The length of the region. 1086 * @return The actual coverage of the returned scanner tries to match the 1087 * specified byte-region but always round up to the compression 1088 * block boundaries. It is possible that the returned scanner 1089 * contains zero key-value pairs even if length is positive. 1090 * @throws IOException 1091 */ 1092 public Scanner createScannerByByteRange(long offset, long length) throws IOException { 1093 return new Scanner(this, offset, offset + length); 1094 } 1095 1096 /** 1097 * Get a scanner that covers a portion of TFile based on keys. 1098 * 1099 * @param beginKey 1100 * Begin key of the scan (inclusive). If null, scan from the first 1101 * key-value entry of the TFile. 1102 * @param endKey 1103 * End key of the scan (exclusive). If null, scan up to the last 1104 * key-value entry of the TFile. 1105 * @return The actual coverage of the returned scanner will cover all keys 1106 * greater than or equal to the beginKey and less than the endKey. 1107 * @throws IOException 1108 * 1109 * @deprecated Use {@link #createScannerByKey(byte[], byte[])} instead. 1110 */ 1111 @Deprecated 1112 public Scanner createScanner(byte[] beginKey, byte[] endKey) 1113 throws IOException { 1114 return createScannerByKey(beginKey, endKey); 1115 } 1116 1117 /** 1118 * Get a scanner that covers a portion of TFile based on keys. 1119 * 1120 * @param beginKey 1121 * Begin key of the scan (inclusive). If null, scan from the first 1122 * key-value entry of the TFile. 1123 * @param endKey 1124 * End key of the scan (exclusive). If null, scan up to the last 1125 * key-value entry of the TFile. 1126 * @return The actual coverage of the returned scanner will cover all keys 1127 * greater than or equal to the beginKey and less than the endKey. 1128 * @throws IOException 1129 */ 1130 public Scanner createScannerByKey(byte[] beginKey, byte[] endKey) 1131 throws IOException { 1132 return createScannerByKey((beginKey == null) ? null : new ByteArray(beginKey, 1133 0, beginKey.length), (endKey == null) ? null : new ByteArray(endKey, 1134 0, endKey.length)); 1135 } 1136 1137 /** 1138 * Get a scanner that covers a specific key range. 1139 * 1140 * @param beginKey 1141 * Begin key of the scan (inclusive). If null, scan from the first 1142 * key-value entry of the TFile. 1143 * @param endKey 1144 * End key of the scan (exclusive). If null, scan up to the last 1145 * key-value entry of the TFile. 1146 * @return The actual coverage of the returned scanner will cover all keys 1147 * greater than or equal to the beginKey and less than the endKey. 1148 * @throws IOException 1149 * 1150 * @deprecated Use {@link #createScannerByKey(RawComparable, RawComparable)} 1151 * instead. 1152 */ 1153 @Deprecated 1154 public Scanner createScanner(RawComparable beginKey, RawComparable endKey) 1155 throws IOException { 1156 return createScannerByKey(beginKey, endKey); 1157 } 1158 1159 /** 1160 * Get a scanner that covers a specific key range. 1161 * 1162 * @param beginKey 1163 * Begin key of the scan (inclusive). If null, scan from the first 1164 * key-value entry of the TFile. 1165 * @param endKey 1166 * End key of the scan (exclusive). If null, scan up to the last 1167 * key-value entry of the TFile. 1168 * @return The actual coverage of the returned scanner will cover all keys 1169 * greater than or equal to the beginKey and less than the endKey. 1170 * @throws IOException 1171 */ 1172 public Scanner createScannerByKey(RawComparable beginKey, RawComparable endKey) 1173 throws IOException { 1174 if ((beginKey != null) && (endKey != null) 1175 && (compareKeys(beginKey, endKey) >= 0)) { 1176 return new Scanner(this, beginKey, beginKey); 1177 } 1178 return new Scanner(this, beginKey, endKey); 1179 } 1180 1181 /** 1182 * Create a scanner that covers a range of records. 1183 * 1184 * @param beginRecNum 1185 * The RecordNum for the first record (inclusive). 1186 * @param endRecNum 1187 * The RecordNum for the last record (exclusive). To scan the whole 1188 * file, either specify endRecNum==-1 or endRecNum==getEntryCount(). 1189 * @return The TFile scanner that covers the specified range of records. 1190 * @throws IOException 1191 */ 1192 public Scanner createScannerByRecordNum(long beginRecNum, long endRecNum) 1193 throws IOException { 1194 if (beginRecNum < 0) beginRecNum = 0; 1195 if (endRecNum < 0 || endRecNum > getEntryCount()) { 1196 endRecNum = getEntryCount(); 1197 } 1198 return new Scanner(this, getLocationByRecordNum(beginRecNum), 1199 getLocationByRecordNum(endRecNum)); 1200 } 1201 1202 /** 1203 * The TFile Scanner. The Scanner has an implicit cursor, which, upon 1204 * creation, points to the first key-value pair in the scan range. If the 1205 * scan range is empty, the cursor will point to the end of the scan range. 1206 * <p> 1207 * Use {@link Scanner#atEnd()} to test whether the cursor is at the end 1208 * location of the scanner. 1209 * <p> 1210 * Use {@link Scanner#advance()} to move the cursor to the next key-value 1211 * pair (or end if none exists). Use seekTo methods ( 1212 * {@link Scanner#seekTo(byte[])} or 1213 * {@link Scanner#seekTo(byte[], int, int)}) to seek to any arbitrary 1214 * location in the covered range (including backward seeking). Use 1215 * {@link Scanner#rewind()} to seek back to the beginning of the scanner. 1216 * Use {@link Scanner#seekToEnd()} to seek to the end of the scanner. 1217 * <p> 1218 * Actual keys and values may be obtained through {@link Scanner.Entry} 1219 * object, which is obtained through {@link Scanner#entry()}. 1220 */ 1221 public static class Scanner implements Closeable { 1222 // The underlying TFile reader. 1223 final Reader reader; 1224 // current block (null if reaching end) 1225 private BlockReader blkReader; 1226 1227 Location beginLocation; 1228 Location endLocation; 1229 Location currentLocation; 1230 1231 // flag to ensure value is only examined once. 1232 boolean valueChecked = false; 1233 // reusable buffer for keys. 1234 final byte[] keyBuffer; 1235 // length of key, -1 means key is invalid. 1236 int klen = -1; 1237 1238 static final int MAX_VAL_TRANSFER_BUF_SIZE = 128 * 1024; 1239 BytesWritable valTransferBuffer; 1240 1241 DataInputBuffer keyDataInputStream; 1242 ChunkDecoder valueBufferInputStream; 1243 DataInputStream valueDataInputStream; 1244 // vlen == -1 if unknown. 1245 int vlen; 1246 1247 /** 1248 * Constructor 1249 * 1250 * @param reader 1251 * The TFile reader object. 1252 * @param offBegin 1253 * Begin byte-offset of the scan. 1254 * @param offEnd 1255 * End byte-offset of the scan. 1256 * @throws IOException 1257 * 1258 * The offsets will be rounded to the beginning of a compressed 1259 * block whose offset is greater than or equal to the specified 1260 * offset. 1261 */ 1262 protected Scanner(Reader reader, long offBegin, long offEnd) 1263 throws IOException { 1264 this(reader, reader.getLocationNear(offBegin), reader 1265 .getLocationNear(offEnd)); 1266 } 1267 1268 /** 1269 * Constructor 1270 * 1271 * @param reader 1272 * The TFile reader object. 1273 * @param begin 1274 * Begin location of the scan. 1275 * @param end 1276 * End location of the scan. 1277 * @throws IOException 1278 */ 1279 Scanner(Reader reader, Location begin, Location end) throws IOException { 1280 this.reader = reader; 1281 // ensure the TFile index is loaded throughout the life of scanner. 1282 reader.checkTFileDataIndex(); 1283 beginLocation = begin; 1284 endLocation = end; 1285 1286 valTransferBuffer = new BytesWritable(); 1287 // TODO: remember the longest key in a TFile, and use it to replace 1288 // MAX_KEY_SIZE. 1289 keyBuffer = new byte[MAX_KEY_SIZE]; 1290 keyDataInputStream = new DataInputBuffer(); 1291 valueBufferInputStream = new ChunkDecoder(); 1292 valueDataInputStream = new DataInputStream(valueBufferInputStream); 1293 1294 if (beginLocation.compareTo(endLocation) >= 0) { 1295 currentLocation = new Location(endLocation); 1296 } else { 1297 currentLocation = new Location(0, 0); 1298 initBlock(beginLocation.getBlockIndex()); 1299 inBlockAdvance(beginLocation.getRecordIndex()); 1300 } 1301 } 1302 1303 /** 1304 * Constructor 1305 * 1306 * @param reader 1307 * The TFile reader object. 1308 * @param beginKey 1309 * Begin key of the scan. If null, scan from the first <K,V> 1310 * entry of the TFile. 1311 * @param endKey 1312 * End key of the scan. If null, scan up to the last <K, V> entry 1313 * of the TFile. 1314 * @throws IOException 1315 */ 1316 protected Scanner(Reader reader, RawComparable beginKey, 1317 RawComparable endKey) throws IOException { 1318 this(reader, (beginKey == null) ? reader.begin() : reader 1319 .getBlockContainsKey(beginKey, false), reader.end()); 1320 if (beginKey != null) { 1321 inBlockAdvance(beginKey, false); 1322 beginLocation.set(currentLocation); 1323 } 1324 if (endKey != null) { 1325 seekTo(endKey, false); 1326 endLocation.set(currentLocation); 1327 seekTo(beginLocation); 1328 } 1329 } 1330 1331 /** 1332 * Move the cursor to the first entry whose key is greater than or equal 1333 * to the input key. Synonymous to seekTo(key, 0, key.length). The entry 1334 * returned by the previous entry() call will be invalid. 1335 * 1336 * @param key 1337 * The input key 1338 * @return true if we find an equal key. 1339 * @throws IOException 1340 */ 1341 public boolean seekTo(byte[] key) throws IOException { 1342 return seekTo(key, 0, key.length); 1343 } 1344 1345 /** 1346 * Move the cursor to the first entry whose key is greater than or equal 1347 * to the input key. The entry returned by the previous entry() call will 1348 * be invalid. 1349 * 1350 * @param key 1351 * The input key 1352 * @param keyOffset 1353 * offset in the key buffer. 1354 * @param keyLen 1355 * key buffer length. 1356 * @return true if we find an equal key; false otherwise. 1357 * @throws IOException 1358 */ 1359 public boolean seekTo(byte[] key, int keyOffset, int keyLen) 1360 throws IOException { 1361 return seekTo(new ByteArray(key, keyOffset, keyLen), false); 1362 } 1363 1364 private boolean seekTo(RawComparable key, boolean beyond) 1365 throws IOException { 1366 Location l = reader.getBlockContainsKey(key, beyond); 1367 if (l.compareTo(beginLocation) < 0) { 1368 l = beginLocation; 1369 } else if (l.compareTo(endLocation) >= 0) { 1370 seekTo(endLocation); 1371 return false; 1372 } 1373 1374 // check if what we are seeking is in the later part of the current 1375 // block. 1376 if (atEnd() || (l.getBlockIndex() != currentLocation.getBlockIndex()) 1377 || (compareCursorKeyTo(key) >= 0)) { 1378 // sorry, we must seek to a different location first. 1379 seekTo(l); 1380 } 1381 1382 return inBlockAdvance(key, beyond); 1383 } 1384 1385 /** 1386 * Move the cursor to the new location. The entry returned by the previous 1387 * entry() call will be invalid. 1388 * 1389 * @param l 1390 * new cursor location. It must fall between the begin and end 1391 * location of the scanner. 1392 * @throws IOException 1393 */ 1394 private void seekTo(Location l) throws IOException { 1395 if (l.compareTo(beginLocation) < 0) { 1396 throw new IllegalArgumentException( 1397 "Attempt to seek before the begin location."); 1398 } 1399 1400 if (l.compareTo(endLocation) > 0) { 1401 throw new IllegalArgumentException( 1402 "Attempt to seek after the end location."); 1403 } 1404 1405 if (l.compareTo(endLocation) == 0) { 1406 parkCursorAtEnd(); 1407 return; 1408 } 1409 1410 if (l.getBlockIndex() != currentLocation.getBlockIndex()) { 1411 // going to a totally different block 1412 initBlock(l.getBlockIndex()); 1413 } else { 1414 if (valueChecked) { 1415 // may temporarily go beyond the last record in the block (in which 1416 // case the next if loop will always be true). 1417 inBlockAdvance(1); 1418 } 1419 if (l.getRecordIndex() < currentLocation.getRecordIndex()) { 1420 initBlock(l.getBlockIndex()); 1421 } 1422 } 1423 1424 inBlockAdvance(l.getRecordIndex() - currentLocation.getRecordIndex()); 1425 1426 return; 1427 } 1428 1429 /** 1430 * Rewind to the first entry in the scanner. The entry returned by the 1431 * previous entry() call will be invalid. 1432 * 1433 * @throws IOException 1434 */ 1435 public void rewind() throws IOException { 1436 seekTo(beginLocation); 1437 } 1438 1439 /** 1440 * Seek to the end of the scanner. The entry returned by the previous 1441 * entry() call will be invalid. 1442 * 1443 * @throws IOException 1444 */ 1445 public void seekToEnd() throws IOException { 1446 parkCursorAtEnd(); 1447 } 1448 1449 /** 1450 * Move the cursor to the first entry whose key is greater than or equal 1451 * to the input key. Synonymous to lowerBound(key, 0, key.length). The 1452 * entry returned by the previous entry() call will be invalid. 1453 * 1454 * @param key 1455 * The input key 1456 * @throws IOException 1457 */ 1458 public void lowerBound(byte[] key) throws IOException { 1459 lowerBound(key, 0, key.length); 1460 } 1461 1462 /** 1463 * Move the cursor to the first entry whose key is greater than or equal 1464 * to the input key. The entry returned by the previous entry() call will 1465 * be invalid. 1466 * 1467 * @param key 1468 * The input key 1469 * @param keyOffset 1470 * offset in the key buffer. 1471 * @param keyLen 1472 * key buffer length. 1473 * @throws IOException 1474 */ 1475 public void lowerBound(byte[] key, int keyOffset, int keyLen) 1476 throws IOException { 1477 seekTo(new ByteArray(key, keyOffset, keyLen), false); 1478 } 1479 1480 /** 1481 * Move the cursor to the first entry whose key is strictly greater than 1482 * the input key. Synonymous to upperBound(key, 0, key.length). The entry 1483 * returned by the previous entry() call will be invalid. 1484 * 1485 * @param key 1486 * The input key 1487 * @throws IOException 1488 */ 1489 public void upperBound(byte[] key) throws IOException { 1490 upperBound(key, 0, key.length); 1491 } 1492 1493 /** 1494 * Move the cursor to the first entry whose key is strictly greater than 1495 * the input key. The entry returned by the previous entry() call will be 1496 * invalid. 1497 * 1498 * @param key 1499 * The input key 1500 * @param keyOffset 1501 * offset in the key buffer. 1502 * @param keyLen 1503 * key buffer length. 1504 * @throws IOException 1505 */ 1506 public void upperBound(byte[] key, int keyOffset, int keyLen) 1507 throws IOException { 1508 seekTo(new ByteArray(key, keyOffset, keyLen), true); 1509 } 1510 1511 /** 1512 * Move the cursor to the next key-value pair. The entry returned by the 1513 * previous entry() call will be invalid. 1514 * 1515 * @return true if the cursor successfully moves. False when cursor is 1516 * already at the end location and cannot be advanced. 1517 * @throws IOException 1518 */ 1519 public boolean advance() throws IOException { 1520 if (atEnd()) { 1521 return false; 1522 } 1523 1524 int curBid = currentLocation.getBlockIndex(); 1525 long curRid = currentLocation.getRecordIndex(); 1526 long entriesInBlock = reader.getBlockEntryCount(curBid); 1527 if (curRid + 1 >= entriesInBlock) { 1528 if (endLocation.compareTo(curBid + 1, 0) <= 0) { 1529 // last entry in TFile. 1530 parkCursorAtEnd(); 1531 } else { 1532 // last entry in Block. 1533 initBlock(curBid + 1); 1534 } 1535 } else { 1536 inBlockAdvance(1); 1537 } 1538 return true; 1539 } 1540 1541 /** 1542 * Load a compressed block for reading. Expecting blockIndex is valid. 1543 * 1544 * @throws IOException 1545 */ 1546 private void initBlock(int blockIndex) throws IOException { 1547 klen = -1; 1548 if (blkReader != null) { 1549 try { 1550 blkReader.close(); 1551 } finally { 1552 blkReader = null; 1553 } 1554 } 1555 blkReader = reader.getBlockReader(blockIndex); 1556 currentLocation.set(blockIndex, 0); 1557 } 1558 1559 private void parkCursorAtEnd() throws IOException { 1560 klen = -1; 1561 currentLocation.set(endLocation); 1562 if (blkReader != null) { 1563 try { 1564 blkReader.close(); 1565 } finally { 1566 blkReader = null; 1567 } 1568 } 1569 } 1570 1571 /** 1572 * Close the scanner. Release all resources. The behavior of using the 1573 * scanner after calling close is not defined. The entry returned by the 1574 * previous entry() call will be invalid. 1575 */ 1576 public void close() throws IOException { 1577 parkCursorAtEnd(); 1578 } 1579 1580 /** 1581 * Is cursor at the end location? 1582 * 1583 * @return true if the cursor is at the end location. 1584 */ 1585 public boolean atEnd() { 1586 return (currentLocation.compareTo(endLocation) >= 0); 1587 } 1588 1589 /** 1590 * check whether we have already successfully obtained the key. It also 1591 * initializes the valueInputStream. 1592 */ 1593 void checkKey() throws IOException { 1594 if (klen >= 0) return; 1595 if (atEnd()) { 1596 throw new EOFException("No key-value to read"); 1597 } 1598 klen = -1; 1599 vlen = -1; 1600 valueChecked = false; 1601 1602 klen = Utils.readVInt(blkReader); 1603 blkReader.readFully(keyBuffer, 0, klen); 1604 valueBufferInputStream.reset(blkReader); 1605 if (valueBufferInputStream.isLastChunk()) { 1606 vlen = valueBufferInputStream.getRemain(); 1607 } 1608 } 1609 1610 /** 1611 * Get an entry to access the key and value. 1612 * 1613 * @return The Entry object to access the key and value. 1614 * @throws IOException 1615 */ 1616 public Entry entry() throws IOException { 1617 checkKey(); 1618 return new Entry(); 1619 } 1620 1621 /** 1622 * Get the RecordNum corresponding to the entry pointed by the cursor. 1623 * @return The RecordNum corresponding to the entry pointed by the cursor. 1624 * @throws IOException 1625 */ 1626 public long getRecordNum() throws IOException { 1627 return reader.getRecordNumByLocation(currentLocation); 1628 } 1629 1630 /** 1631 * Internal API. Comparing the key at cursor to user-specified key. 1632 * 1633 * @param other 1634 * user-specified key. 1635 * @return negative if key at cursor is smaller than user key; 0 if equal; 1636 * and positive if key at cursor greater than user key. 1637 * @throws IOException 1638 */ 1639 int compareCursorKeyTo(RawComparable other) throws IOException { 1640 checkKey(); 1641 return reader.compareKeys(keyBuffer, 0, klen, other.buffer(), other 1642 .offset(), other.size()); 1643 } 1644 1645 /** 1646 * Entry to a <Key, Value> pair. 1647 */ 1648 public class Entry implements Comparable<RawComparable> { 1649 /** 1650 * Get the length of the key. 1651 * 1652 * @return the length of the key. 1653 */ 1654 public int getKeyLength() { 1655 return klen; 1656 } 1657 1658 byte[] getKeyBuffer() { 1659 return keyBuffer; 1660 } 1661 1662 /** 1663 * Copy the key and value in one shot into BytesWritables. This is 1664 * equivalent to getKey(key); getValue(value); 1665 * 1666 * @param key 1667 * BytesWritable to hold key. 1668 * @param value 1669 * BytesWritable to hold value 1670 * @throws IOException 1671 */ 1672 public void get(BytesWritable key, BytesWritable value) 1673 throws IOException { 1674 getKey(key); 1675 getValue(value); 1676 } 1677 1678 /** 1679 * Copy the key into BytesWritable. The input BytesWritable will be 1680 * automatically resized to the actual key size. 1681 * 1682 * @param key 1683 * BytesWritable to hold the key. 1684 * @throws IOException 1685 */ 1686 public int getKey(BytesWritable key) throws IOException { 1687 key.setSize(getKeyLength()); 1688 getKey(key.getBytes()); 1689 return key.getLength(); 1690 } 1691 1692 /** 1693 * Copy the value into BytesWritable. The input BytesWritable will be 1694 * automatically resized to the actual value size. The implementation 1695 * directly uses the buffer inside BytesWritable for storing the value. 1696 * The call does not require the value length to be known. 1697 * 1698 * @param value 1699 * @throws IOException 1700 */ 1701 public long getValue(BytesWritable value) throws IOException { 1702 DataInputStream dis = getValueStream(); 1703 int size = 0; 1704 try { 1705 int remain; 1706 while ((remain = valueBufferInputStream.getRemain()) > 0) { 1707 value.setSize(size + remain); 1708 dis.readFully(value.getBytes(), size, remain); 1709 size += remain; 1710 } 1711 return value.getLength(); 1712 } finally { 1713 dis.close(); 1714 } 1715 } 1716 1717 /** 1718 * Writing the key to the output stream. This method avoids copying key 1719 * buffer from Scanner into user buffer, then writing to the output 1720 * stream. 1721 * 1722 * @param out 1723 * The output stream 1724 * @return the length of the key. 1725 * @throws IOException 1726 */ 1727 public int writeKey(OutputStream out) throws IOException { 1728 out.write(keyBuffer, 0, klen); 1729 return klen; 1730 } 1731 1732 /** 1733 * Writing the value to the output stream. This method avoids copying 1734 * value data from Scanner into user buffer, then writing to the output 1735 * stream. It does not require the value length to be known. 1736 * 1737 * @param out 1738 * The output stream 1739 * @return the length of the value 1740 * @throws IOException 1741 */ 1742 public long writeValue(OutputStream out) throws IOException { 1743 DataInputStream dis = getValueStream(); 1744 long size = 0; 1745 try { 1746 int chunkSize; 1747 while ((chunkSize = valueBufferInputStream.getRemain()) > 0) { 1748 chunkSize = Math.min(chunkSize, MAX_VAL_TRANSFER_BUF_SIZE); 1749 valTransferBuffer.setSize(chunkSize); 1750 dis.readFully(valTransferBuffer.getBytes(), 0, chunkSize); 1751 out.write(valTransferBuffer.getBytes(), 0, chunkSize); 1752 size += chunkSize; 1753 } 1754 return size; 1755 } finally { 1756 dis.close(); 1757 } 1758 } 1759 1760 /** 1761 * Copy the key into user supplied buffer. 1762 * 1763 * @param buf 1764 * The buffer supplied by user. The length of the buffer must 1765 * not be shorter than the key length. 1766 * @return The length of the key. 1767 * 1768 * @throws IOException 1769 */ 1770 public int getKey(byte[] buf) throws IOException { 1771 return getKey(buf, 0); 1772 } 1773 1774 /** 1775 * Copy the key into user supplied buffer. 1776 * 1777 * @param buf 1778 * The buffer supplied by user. 1779 * @param offset 1780 * The starting offset of the user buffer where we should copy 1781 * the key into. Requiring the key-length + offset no greater 1782 * than the buffer length. 1783 * @return The length of the key. 1784 * @throws IOException 1785 */ 1786 public int getKey(byte[] buf, int offset) throws IOException { 1787 if ((offset | (buf.length - offset - klen)) < 0) { 1788 throw new IndexOutOfBoundsException( 1789 "Bufer not enough to store the key"); 1790 } 1791 System.arraycopy(keyBuffer, 0, buf, offset, klen); 1792 return klen; 1793 } 1794 1795 /** 1796 * Streaming access to the key. Useful for desrializing the key into 1797 * user objects. 1798 * 1799 * @return The input stream. 1800 */ 1801 public DataInputStream getKeyStream() { 1802 keyDataInputStream.reset(keyBuffer, klen); 1803 return keyDataInputStream; 1804 } 1805 1806 /** 1807 * Get the length of the value. isValueLengthKnown() must be tested 1808 * true. 1809 * 1810 * @return the length of the value. 1811 */ 1812 public int getValueLength() { 1813 if (vlen >= 0) { 1814 return vlen; 1815 } 1816 1817 throw new RuntimeException("Value length unknown."); 1818 } 1819 1820 /** 1821 * Copy value into user-supplied buffer. User supplied buffer must be 1822 * large enough to hold the whole value. The value part of the key-value 1823 * pair pointed by the current cursor is not cached and can only be 1824 * examined once. Calling any of the following functions more than once 1825 * without moving the cursor will result in exception: 1826 * {@link #getValue(byte[])}, {@link #getValue(byte[], int)}, 1827 * {@link #getValueStream}. 1828 * 1829 * @return the length of the value. Does not require 1830 * isValueLengthKnown() to be true. 1831 * @throws IOException 1832 * 1833 */ 1834 public int getValue(byte[] buf) throws IOException { 1835 return getValue(buf, 0); 1836 } 1837 1838 /** 1839 * Copy value into user-supplied buffer. User supplied buffer must be 1840 * large enough to hold the whole value (starting from the offset). The 1841 * value part of the key-value pair pointed by the current cursor is not 1842 * cached and can only be examined once. Calling any of the following 1843 * functions more than once without moving the cursor will result in 1844 * exception: {@link #getValue(byte[])}, {@link #getValue(byte[], int)}, 1845 * {@link #getValueStream}. 1846 * 1847 * @return the length of the value. Does not require 1848 * isValueLengthKnown() to be true. 1849 * @throws IOException 1850 */ 1851 public int getValue(byte[] buf, int offset) throws IOException { 1852 DataInputStream dis = getValueStream(); 1853 try { 1854 if (isValueLengthKnown()) { 1855 if ((offset | (buf.length - offset - vlen)) < 0) { 1856 throw new IndexOutOfBoundsException( 1857 "Buffer too small to hold value"); 1858 } 1859 dis.readFully(buf, offset, vlen); 1860 return vlen; 1861 } 1862 1863 int nextOffset = offset; 1864 while (nextOffset < buf.length) { 1865 int n = dis.read(buf, nextOffset, buf.length - nextOffset); 1866 if (n < 0) { 1867 break; 1868 } 1869 nextOffset += n; 1870 } 1871 if (dis.read() >= 0) { 1872 // attempt to read one more byte to determine whether we reached 1873 // the 1874 // end or not. 1875 throw new IndexOutOfBoundsException( 1876 "Buffer too small to hold value"); 1877 } 1878 return nextOffset - offset; 1879 } finally { 1880 dis.close(); 1881 } 1882 } 1883 1884 /** 1885 * Stream access to value. The value part of the key-value pair pointed 1886 * by the current cursor is not cached and can only be examined once. 1887 * Calling any of the following functions more than once without moving 1888 * the cursor will result in exception: {@link #getValue(byte[])}, 1889 * {@link #getValue(byte[], int)}, {@link #getValueStream}. 1890 * 1891 * @return The input stream for reading the value. 1892 * @throws IOException 1893 */ 1894 public DataInputStream getValueStream() throws IOException { 1895 if (valueChecked == true) { 1896 throw new IllegalStateException( 1897 "Attempt to examine value multiple times."); 1898 } 1899 valueChecked = true; 1900 return valueDataInputStream; 1901 } 1902 1903 /** 1904 * Check whether it is safe to call getValueLength(). 1905 * 1906 * @return true if value length is known before hand. Values less than 1907 * the chunk size will always have their lengths known before 1908 * hand. Values that are written out as a whole (with advertised 1909 * length up-front) will always have their lengths known in 1910 * read. 1911 */ 1912 public boolean isValueLengthKnown() { 1913 return (vlen >= 0); 1914 } 1915 1916 /** 1917 * Compare the entry key to another key. Synonymous to compareTo(key, 0, 1918 * key.length). 1919 * 1920 * @param buf 1921 * The key buffer. 1922 * @return comparison result between the entry key with the input key. 1923 */ 1924 public int compareTo(byte[] buf) { 1925 return compareTo(buf, 0, buf.length); 1926 } 1927 1928 /** 1929 * Compare the entry key to another key. Synonymous to compareTo(new 1930 * ByteArray(buf, offset, length) 1931 * 1932 * @param buf 1933 * The key buffer 1934 * @param offset 1935 * offset into the key buffer. 1936 * @param length 1937 * the length of the key. 1938 * @return comparison result between the entry key with the input key. 1939 */ 1940 public int compareTo(byte[] buf, int offset, int length) { 1941 return compareTo(new ByteArray(buf, offset, length)); 1942 } 1943 1944 /** 1945 * Compare an entry with a RawComparable object. This is useful when 1946 * Entries are stored in a collection, and we want to compare a user 1947 * supplied key. 1948 */ 1949 @Override 1950 public int compareTo(RawComparable key) { 1951 return reader.compareKeys(keyBuffer, 0, getKeyLength(), key.buffer(), 1952 key.offset(), key.size()); 1953 } 1954 1955 /** 1956 * Compare whether this and other points to the same key value. 1957 */ 1958 @Override 1959 public boolean equals(Object other) { 1960 if (this == other) return true; 1961 if (!(other instanceof Entry)) return false; 1962 return ((Entry) other).compareTo(keyBuffer, 0, getKeyLength()) == 0; 1963 } 1964 1965 @Override 1966 public int hashCode() { 1967 return WritableComparator.hashBytes(keyBuffer, 0, getKeyLength()); 1968 } 1969 } 1970 1971 /** 1972 * Advance cursor by n positions within the block. 1973 * 1974 * @param n 1975 * Number of key-value pairs to skip in block. 1976 * @throws IOException 1977 */ 1978 private void inBlockAdvance(long n) throws IOException { 1979 for (long i = 0; i < n; ++i) { 1980 checkKey(); 1981 if (!valueBufferInputStream.isClosed()) { 1982 valueBufferInputStream.close(); 1983 } 1984 klen = -1; 1985 currentLocation.incRecordIndex(); 1986 } 1987 } 1988 1989 /** 1990 * Advance cursor in block until we find a key that is greater than or 1991 * equal to the input key. 1992 * 1993 * @param key 1994 * Key to compare. 1995 * @param greater 1996 * advance until we find a key greater than the input key. 1997 * @return true if we find a equal key. 1998 * @throws IOException 1999 */ 2000 private boolean inBlockAdvance(RawComparable key, boolean greater) 2001 throws IOException { 2002 int curBid = currentLocation.getBlockIndex(); 2003 long entryInBlock = reader.getBlockEntryCount(curBid); 2004 if (curBid == endLocation.getBlockIndex()) { 2005 entryInBlock = endLocation.getRecordIndex(); 2006 } 2007 2008 while (currentLocation.getRecordIndex() < entryInBlock) { 2009 int cmp = compareCursorKeyTo(key); 2010 if (cmp > 0) return false; 2011 if (cmp == 0 && !greater) return true; 2012 if (!valueBufferInputStream.isClosed()) { 2013 valueBufferInputStream.close(); 2014 } 2015 klen = -1; 2016 currentLocation.incRecordIndex(); 2017 } 2018 2019 throw new RuntimeException("Cannot find matching key in block."); 2020 } 2021 } 2022 2023 long getBlockEntryCount(int curBid) { 2024 return tfileIndex.getEntry(curBid).entries(); 2025 } 2026 2027 BlockReader getBlockReader(int blockIndex) throws IOException { 2028 return readerBCF.getDataBlock(blockIndex); 2029 } 2030 } 2031 2032 /** 2033 * Data structure representing "TFile.meta" meta block. 2034 */ 2035 static final class TFileMeta { 2036 final static String BLOCK_NAME = "TFile.meta"; 2037 final Version version; 2038 private long recordCount; 2039 private final String strComparator; 2040 private final BytesComparator comparator; 2041 2042 // ctor for writes 2043 public TFileMeta(String comparator) { 2044 // set fileVersion to API version when we create it. 2045 version = TFile.API_VERSION; 2046 recordCount = 0; 2047 strComparator = (comparator == null) ? "" : comparator; 2048 this.comparator = makeComparator(strComparator); 2049 } 2050 2051 // ctor for reads 2052 public TFileMeta(DataInput in) throws IOException { 2053 version = new Version(in); 2054 if (!version.compatibleWith(TFile.API_VERSION)) { 2055 throw new RuntimeException("Incompatible TFile fileVersion."); 2056 } 2057 recordCount = Utils.readVLong(in); 2058 strComparator = Utils.readString(in); 2059 comparator = makeComparator(strComparator); 2060 } 2061 2062 @SuppressWarnings("unchecked") 2063 static BytesComparator makeComparator(String comparator) { 2064 if (comparator.length() == 0) { 2065 // unsorted keys 2066 return null; 2067 } 2068 if (comparator.equals(COMPARATOR_MEMCMP)) { 2069 // default comparator 2070 return new BytesComparator(new MemcmpRawComparator()); 2071 } else if (comparator.startsWith(COMPARATOR_JCLASS)) { 2072 String compClassName = 2073 comparator.substring(COMPARATOR_JCLASS.length()).trim(); 2074 try { 2075 Class compClass = Class.forName(compClassName); 2076 // use its default ctor to create an instance 2077 return new BytesComparator((RawComparator<Object>) compClass 2078 .newInstance()); 2079 } catch (Exception e) { 2080 throw new IllegalArgumentException( 2081 "Failed to instantiate comparator: " + comparator + "(" 2082 + e.toString() + ")"); 2083 } 2084 } else { 2085 throw new IllegalArgumentException("Unsupported comparator: " 2086 + comparator); 2087 } 2088 } 2089 2090 public void write(DataOutput out) throws IOException { 2091 TFile.API_VERSION.write(out); 2092 Utils.writeVLong(out, recordCount); 2093 Utils.writeString(out, strComparator); 2094 } 2095 2096 public long getRecordCount() { 2097 return recordCount; 2098 } 2099 2100 public void incRecordCount() { 2101 ++recordCount; 2102 } 2103 2104 public boolean isSorted() { 2105 return !strComparator.equals(""); 2106 } 2107 2108 public String getComparatorString() { 2109 return strComparator; 2110 } 2111 2112 public BytesComparator getComparator() { 2113 return comparator; 2114 } 2115 2116 public Version getVersion() { 2117 return version; 2118 } 2119 } // END: class MetaTFileMeta 2120 2121 /** 2122 * Data structure representing "TFile.index" meta block. 2123 */ 2124 static class TFileIndex { 2125 final static String BLOCK_NAME = "TFile.index"; 2126 private ByteArray firstKey; 2127 private final ArrayList<TFileIndexEntry> index; 2128 private final ArrayList<Long> recordNumIndex; 2129 private final BytesComparator comparator; 2130 private long sum = 0; 2131 2132 /** 2133 * For reading from file. 2134 * 2135 * @throws IOException 2136 */ 2137 public TFileIndex(int entryCount, DataInput in, BytesComparator comparator) 2138 throws IOException { 2139 index = new ArrayList<TFileIndexEntry>(entryCount); 2140 recordNumIndex = new ArrayList<Long>(entryCount); 2141 int size = Utils.readVInt(in); // size for the first key entry. 2142 if (size > 0) { 2143 byte[] buffer = new byte[size]; 2144 in.readFully(buffer); 2145 DataInputStream firstKeyInputStream = 2146 new DataInputStream(new ByteArrayInputStream(buffer, 0, size)); 2147 2148 int firstKeyLength = Utils.readVInt(firstKeyInputStream); 2149 firstKey = new ByteArray(new byte[firstKeyLength]); 2150 firstKeyInputStream.readFully(firstKey.buffer()); 2151 2152 for (int i = 0; i < entryCount; i++) { 2153 size = Utils.readVInt(in); 2154 if (buffer.length < size) { 2155 buffer = new byte[size]; 2156 } 2157 in.readFully(buffer, 0, size); 2158 TFileIndexEntry idx = 2159 new TFileIndexEntry(new DataInputStream(new ByteArrayInputStream( 2160 buffer, 0, size))); 2161 index.add(idx); 2162 sum += idx.entries(); 2163 recordNumIndex.add(sum); 2164 } 2165 } else { 2166 if (entryCount != 0) { 2167 throw new RuntimeException("Internal error"); 2168 } 2169 } 2170 this.comparator = comparator; 2171 } 2172 2173 /** 2174 * @param key 2175 * input key. 2176 * @return the ID of the first block that contains key >= input key. Or -1 2177 * if no such block exists. 2178 */ 2179 public int lowerBound(RawComparable key) { 2180 if (comparator == null) { 2181 throw new RuntimeException("Cannot search in unsorted TFile"); 2182 } 2183 2184 if (firstKey == null) { 2185 return -1; // not found 2186 } 2187 2188 int ret = Utils.lowerBound(index, key, comparator); 2189 if (ret == index.size()) { 2190 return -1; 2191 } 2192 return ret; 2193 } 2194 2195 /** 2196 * @param key 2197 * input key. 2198 * @return the ID of the first block that contains key > input key. Or -1 2199 * if no such block exists. 2200 */ 2201 public int upperBound(RawComparable key) { 2202 if (comparator == null) { 2203 throw new RuntimeException("Cannot search in unsorted TFile"); 2204 } 2205 2206 if (firstKey == null) { 2207 return -1; // not found 2208 } 2209 2210 int ret = Utils.upperBound(index, key, comparator); 2211 if (ret == index.size()) { 2212 return -1; 2213 } 2214 return ret; 2215 } 2216 2217 /** 2218 * For writing to file. 2219 */ 2220 public TFileIndex(BytesComparator comparator) { 2221 index = new ArrayList<TFileIndexEntry>(); 2222 recordNumIndex = new ArrayList<Long>(); 2223 this.comparator = comparator; 2224 } 2225 2226 public RawComparable getFirstKey() { 2227 return firstKey; 2228 } 2229 2230 public Reader.Location getLocationByRecordNum(long recNum) { 2231 int idx = Utils.upperBound(recordNumIndex, recNum); 2232 long lastRecNum = (idx == 0)? 0: recordNumIndex.get(idx-1); 2233 return new Reader.Location(idx, recNum-lastRecNum); 2234 } 2235 2236 public long getRecordNumByLocation(Reader.Location location) { 2237 int blkIndex = location.getBlockIndex(); 2238 long lastRecNum = (blkIndex == 0) ? 0: recordNumIndex.get(blkIndex-1); 2239 return lastRecNum + location.getRecordIndex(); 2240 } 2241 2242 public void setFirstKey(byte[] key, int offset, int length) { 2243 firstKey = new ByteArray(new byte[length]); 2244 System.arraycopy(key, offset, firstKey.buffer(), 0, length); 2245 } 2246 2247 public RawComparable getLastKey() { 2248 if (index.size() == 0) { 2249 return null; 2250 } 2251 return new ByteArray(index.get(index.size() - 1).buffer()); 2252 } 2253 2254 public void addEntry(TFileIndexEntry keyEntry) { 2255 index.add(keyEntry); 2256 sum += keyEntry.entries(); 2257 recordNumIndex.add(sum); 2258 } 2259 2260 public TFileIndexEntry getEntry(int bid) { 2261 return index.get(bid); 2262 } 2263 2264 public void write(DataOutput out) throws IOException { 2265 if (firstKey == null) { 2266 Utils.writeVInt(out, 0); 2267 return; 2268 } 2269 2270 DataOutputBuffer dob = new DataOutputBuffer(); 2271 Utils.writeVInt(dob, firstKey.size()); 2272 dob.write(firstKey.buffer()); 2273 Utils.writeVInt(out, dob.size()); 2274 out.write(dob.getData(), 0, dob.getLength()); 2275 2276 for (TFileIndexEntry entry : index) { 2277 dob.reset(); 2278 entry.write(dob); 2279 Utils.writeVInt(out, dob.getLength()); 2280 out.write(dob.getData(), 0, dob.getLength()); 2281 } 2282 } 2283 } 2284 2285 /** 2286 * TFile Data Index entry. We should try to make the memory footprint of each 2287 * index entry as small as possible. 2288 */ 2289 static final class TFileIndexEntry implements RawComparable { 2290 final byte[] key; 2291 // count of <key, value> entries in the block. 2292 final long kvEntries; 2293 2294 public TFileIndexEntry(DataInput in) throws IOException { 2295 int len = Utils.readVInt(in); 2296 key = new byte[len]; 2297 in.readFully(key, 0, len); 2298 kvEntries = Utils.readVLong(in); 2299 } 2300 2301 // default entry, without any padding 2302 public TFileIndexEntry(byte[] newkey, int offset, int len, long entries) { 2303 key = new byte[len]; 2304 System.arraycopy(newkey, offset, key, 0, len); 2305 this.kvEntries = entries; 2306 } 2307 2308 @Override 2309 public byte[] buffer() { 2310 return key; 2311 } 2312 2313 @Override 2314 public int offset() { 2315 return 0; 2316 } 2317 2318 @Override 2319 public int size() { 2320 return key.length; 2321 } 2322 2323 long entries() { 2324 return kvEntries; 2325 } 2326 2327 public void write(DataOutput out) throws IOException { 2328 Utils.writeVInt(out, key.length); 2329 out.write(key, 0, key.length); 2330 Utils.writeVLong(out, kvEntries); 2331 } 2332 } 2333 2334 /** 2335 * Dumping the TFile information. 2336 * 2337 * @param args 2338 * A list of TFile paths. 2339 */ 2340 public static void main(String[] args) { 2341 System.out.printf("TFile Dumper (TFile %s, BCFile %s)\n", TFile.API_VERSION 2342 .toString(), BCFile.API_VERSION.toString()); 2343 if (args.length == 0) { 2344 System.out 2345 .println("Usage: java ... org.apache.hadoop.io.file.tfile.TFile tfile-path [tfile-path ...]"); 2346 System.exit(0); 2347 } 2348 Configuration conf = new Configuration(); 2349 2350 for (String file : args) { 2351 System.out.println("===" + file + "==="); 2352 try { 2353 TFileDumper.dumpInfo(file, System.out, conf); 2354 } catch (IOException e) { 2355 e.printStackTrace(System.err); 2356 } 2357 } 2358 } 2359 }