public class OrcBatchRecordReader extends Object
Constructor and Description |
---|
OrcBatchRecordReader(Map<Integer,Type> includedColumns,
OrcPredicate predicate,
long numberOfRows,
List<StripeInformation> fileStripes,
List<ColumnStatistics> fileStats,
List<StripeStatistics> stripeStats,
OrcDataSource orcDataSource,
long splitOffset,
long splitLength,
List<OrcType> types,
Optional<OrcDecompressor> decompressor,
int rowsInRowGroup,
org.joda.time.DateTimeZone hiveStorageTimeZone,
PostScript.HiveWriterVersion hiveWriterVersion,
MetadataReader metadataReader,
io.airlift.units.DataSize maxMergeDistance,
io.airlift.units.DataSize tinyStripeThreshold,
io.airlift.units.DataSize maxBlockSize,
Map<String,io.airlift.slice.Slice> userMetadata,
AggregatedMemoryContext systemMemoryUsage,
Optional<OrcWriteValidation> writeValidation,
int initialBatchSize,
StripeMetadataSource stripeMetadataSource,
HiveFileContext hiveFileContext) |
Modifier and Type | Method and Description |
---|---|
protected void |
adjustMaxBatchSize(long averageRowBytes) |
protected void |
batchRead(int batchSize) |
void |
close() |
protected static StreamDescriptor |
createStreamDescriptor(String parentStreamName,
String fieldName,
int typeId,
List<OrcType> types,
OrcDataSource dataSource) |
long |
getFilePosition()
Return the row position relative to the start of the file.
|
long |
getFileRowCount()
Returns the total number of rows in the file.
|
long |
getMaxCombinedBytesPerRow()
Returns the sum of the largest cells in size from each column
|
protected int |
getNextRowInGroup() |
long |
getReaderPosition()
Return the row position within the stripes being read by this reader.
|
long |
getReaderRowCount()
Returns the total number of rows that can possibly be read by this reader.
|
protected long |
getRetainedSizeInBytes() |
long |
getSplitLength() |
protected T[] |
getStreamReaders() |
Map<String,io.airlift.slice.Slice> |
getUserMetadata() |
boolean |
isColumnPresent(int hiveColumnIndex) |
int |
nextBatch() |
protected int |
prepareNextBatch() |
Block |
readBlock(int columnIndex) |
protected boolean |
shouldValidateWritePageChecksum() |
protected void |
updateMaxCombinedBytesPerRow(int columnIndex,
Block block) |
protected void |
validateWritePageChecksum(Page page) |
public OrcBatchRecordReader(Map<Integer,Type> includedColumns, OrcPredicate predicate, long numberOfRows, List<StripeInformation> fileStripes, List<ColumnStatistics> fileStats, List<StripeStatistics> stripeStats, OrcDataSource orcDataSource, long splitOffset, long splitLength, List<OrcType> types, Optional<OrcDecompressor> decompressor, int rowsInRowGroup, org.joda.time.DateTimeZone hiveStorageTimeZone, PostScript.HiveWriterVersion hiveWriterVersion, MetadataReader metadataReader, io.airlift.units.DataSize maxMergeDistance, io.airlift.units.DataSize tinyStripeThreshold, io.airlift.units.DataSize maxBlockSize, Map<String,io.airlift.slice.Slice> userMetadata, AggregatedMemoryContext systemMemoryUsage, Optional<OrcWriteValidation> writeValidation, int initialBatchSize, StripeMetadataSource stripeMetadataSource, HiveFileContext hiveFileContext) throws OrcCorruptionException
OrcCorruptionException
public int nextBatch() throws IOException
IOException
public Block readBlock(int columnIndex) throws IOException
IOException
protected long getRetainedSizeInBytes()
public long getMaxCombinedBytesPerRow()
protected void updateMaxCombinedBytesPerRow(int columnIndex, Block block)
protected T[] getStreamReaders()
public long getFilePosition()
public long getFileRowCount()
public long getReaderPosition()
public long getReaderRowCount()
public long getSplitLength()
public void close() throws IOException
close
in interface Closeable
close
in interface AutoCloseable
IOException
public boolean isColumnPresent(int hiveColumnIndex)
protected int getNextRowInGroup()
protected void batchRead(int batchSize)
protected void adjustMaxBatchSize(long averageRowBytes)
protected int prepareNextBatch() throws IOException
IOException
protected static StreamDescriptor createStreamDescriptor(String parentStreamName, String fieldName, int typeId, List<OrcType> types, OrcDataSource dataSource)
protected boolean shouldValidateWritePageChecksum()
protected void validateWritePageChecksum(Page page)
Copyright © 2012–2020. All rights reserved.