ColumnDeltaEncoder

Instance Constructors

new ColumnDeltaEncoder(hierarchyDepth: Int)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final var _lowerDecimal: Decimal

Attributes
protected
Definition Classes
ColumnEncoder
final var _lowerDouble: Double

Attributes
protected
Definition Classes
ColumnEncoder
final var _lowerLong: Long

Attributes
protected
Definition Classes
ColumnEncoder
final var _lowerStr: UTF8String

Attributes
protected
Definition Classes
ColumnEncoder
final var _upperDecimal: Decimal

Attributes
protected
Definition Classes
ColumnEncoder
final var _upperDouble: Double

Attributes
protected
Definition Classes
ColumnEncoder
final var _upperLong: Long

Attributes
protected
Definition Classes
ColumnEncoder
final var _upperStr: UTF8String

Attributes
protected
Definition Classes
ColumnEncoder
final var allocator: BufferAllocator

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnEncoder
final def asInstanceOf[T0]: T0

Definition Classes
Any
final var baseDataOffset: Long

Attributes
protected
Definition Classes
ColumnEncoder
final def baseOffset: Long

Definition Classes
ColumnEncoder
final var baseTypeOffset: Long

Temporary offset results to be read by generated code immediately after initializeComplexType, so not an issue for nested types.
Temporary offset results to be read by generated code immediately after initializeComplexType, so not an issue for nested types.

Attributes
protected
Definition Classes
ColumnEncoder
final def buffer: AnyRef

Definition Classes
ColumnEncoder
final def clearSource(newSize: Int, releaseData: Boolean): Unit

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnEncoder
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def close(): Unit

Close and relinquish all resources of this encoder.
Close and relinquish all resources of this encoder. The encoder may no longer be usable after this call.

Definition Classes
ColumnEncoder
final var columnBeginPosition: Long

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnEncoder
final var columnBytes: AnyRef

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnEncoder
final var columnData: ByteBuffer

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnEncoder
final var columnEndPosition: Long

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnEncoder
final def copyTo(dest: ByteBuffer, srcOffset: Int, endOffset: Int): Unit

Attributes
protected
Definition Classes
ColumnEncoder
def defaultSize(dataType: DataType): Int

Definition Classes
ColumnEncoder
def encodedSize(cursor: Long, dataBeginPosition: Long): Long

The final size of the encoder column (excluding header and nulls) which should match that occupied after finish but without writing anything.
The final size of the encoder column (excluding header and nulls) which should match that occupied after finish but without writing anything.

Definition Classes
ColumnEncoder
final def ensureCapacity(cursor: Long, required: Int): Long

Definition Classes
ColumnEncoder
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def expand(cursor: Long, required: Int): Long

Expand the underlying bytes if required and return the new cursor
Expand the underlying bytes if required and return the new cursor

Attributes
protected
Definition Classes
ColumnEncoder
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def finish(encoderCursor: Long, numBaseRows: Int): ByteBuffer
def finish(encoderCursor: Long): ByteBuffer

Finish encoding the current column and return the data as a ByteBuffer.
Finish encoding the current column and return the data as a ByteBuffer. The encoder can be reused for new column data of same type again.

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def flushWithoutFinish(cursor: Long): Long

flush any pending data when finish is not being invoked explicitly
flush any pending data when finish is not being invoked explicitly

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
final var forComplexType: Boolean

Attributes
protected
Definition Classes
ColumnEncoder
final def getBaseDataOffset: Long

Definition Classes
ColumnEncoder
final def getBaseTypeOffset: Long

Definition Classes
ColumnEncoder
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getMaxSizeForHierarchy(numColumnRows: Int): Int
def getNumNullWords: Int

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def getRealEncoder: ColumnEncoder
def hashCode(): Int

Definition Classes
AnyRef → Any
val hierarchyDepth: Int
def initSizeInBytes(dataType: DataType, initSize: Long, defSize: Int): Long

Definition Classes
ColumnEncoder
def initialize(dataType: DataType, nullable: Boolean, initSize: Int, withHeader: Boolean, allocator: BufferAllocator, minBufferSize: Int = 1): Long

Initialize this ColumnEncoder.
Initialize this ColumnEncoder.
dataType
DataType of the field to be written
nullable
True if the field is nullable, false otherwise
initSize
Initial estimated number of elements to be written
withHeader
True if header is to be written to data (typeId etc)
allocator
the BufferAllocator to use for the data
minBufferSize
the minimum size of initial buffer to use (ignored if <= 0)
returns
initial position of the cursor that caller must use to write

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
final def initialize(dataType: DataType, nullable: Boolean, initSize: Int, withHeader: Boolean, allocator: BufferAllocator): Long

Initialize this ColumnEncoder.
Initialize this ColumnEncoder.
dataType
DataType of the field to be written
nullable
True if the field is nullable, false otherwise
initSize
Initial estimated number of elements to be written
withHeader
True if header is to be written to data (typeId etc)
allocator
the BufferAllocator to use for the data
returns
initial position of the cursor that caller must use to write

Definition Classes
ColumnEncoder
final def initialize(field: StructField, initSize: Int, withHeader: Boolean, allocator: BufferAllocator): Long

Definition Classes
ColumnEncoder
final def initialize(field: StructField, initSize: Int, withHeader: Boolean): Long

Definition Classes
ColumnEncoder
final def initializeComplexType(cursor: Long, numElements: Int, skipBytes: Int, writeNumElements: Boolean): Long

Complex types are written similar to UnsafeRows while respecting platform endianness (format is always little endian) so appropriate for storage.
Complex types are written similar to UnsafeRows while respecting platform endianness (format is always little endian) so appropriate for storage. Also have other minor differences related to size writing and interval type handling. General layout looks like below:
```
.--------------------------- Optional total size including itself (4 bytes)
|   .----------------------- Optional number of elements (4 bytes)
|   |   .------------------- Null bitset longs (8 x (N / 8) bytes)
|   |   |
|   |   |     .------------- Offsets+Sizes of elements (8 x N bytes)
|   |   |     |     .------- Variable length elements
V   V   V     V     V
+---+---+-----+-------------+
|   |   | ... | ... ... ... |
+---+---+-----+-------------+
 \-----/ \-----------------/
  header      body
```
The above generic layout is used for ARRAY and STRUCT types.
The total size of the data is written for top-level complex types. Nested complex objects write their sizes in the "Offsets+Sizes" portion in the respective parent object.
ARRAY types also write the number of elements in the array in the header while STRUCT types skip it since it is fixed in the meta-data.
The null bitset follows the header. To keep the reads aligned at 8 byte boundaries while preserving space, the implementation will combine the header and the null bitset portion, then pad them together at 8 byte boundary (in particular it will consider header as some additional empty fields in the null bitset itself).
After this follows the "Offsets+Sizes" which keeps the offset and size for variable length elements. Fixed length elements less than 8 bytes in size are written directly in the offset+size portion. Variable length elements have their offsets (from start of this array) and sizes encoded in this portion as a long (4 bytes for each of offset and size). Fixed width elements that are greater than 8 bytes are encoded like variable length elements. CalendarInterval is the only type currently that is of that nature whose "months" portion is encoded into the size while the "microseconds" portion is written into variable length part.
MAP types are written as an ARRAY of keys followed by ARRAY of values like in Spark. To keep things simpler both ARRAYs always have the optional size header at their respective starts which together determine the total size of the encoded MAP object. For nested MAP types, the total size is skipped from the "Offsets+Sizes" portion and only the offset is written (which is the start of key ARRAY).
Definition Classes
ColumnEncoder
def initializeLimits(): Unit

Attributes
protected
Definition Classes
ColumnEncoder
def initializeNulls(initSize: Int): Int

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnDeltaEncoder → ColumnEncoder
final def isAllocatorFinal: Boolean

Attributes
protected
Definition Classes
ColumnEncoder
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isNullable: Boolean

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
final def lowerDecimal: Decimal

Definition Classes
ColumnEncoder
final def lowerDouble: Double

Definition Classes
ColumnEncoder
final def lowerLong: Long

Definition Classes
ColumnEncoder
final def lowerString: UTF8String

Definition Classes
ColumnEncoder
def merge(newValue: ByteBuffer, existingValue: ByteBuffer, existingIsDelta: Boolean, field: StructField): ByteBuffer
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def nullCount: Int

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def offset(cursor: Long): Long

Definition Classes
ColumnEncoder
final def releaseForReuse(newSize: Int): Unit

Attributes
protected
Definition Classes
ColumnEncoder
final var reuseUsedSize: Int

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnEncoder
final def setAllocator(allocator: BufferAllocator): Unit

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnEncoder
final def setOffsetAndSize(cursor: Long, fieldOffset: Long, baseOffset: Long, size: Int): Unit

Definition Classes
ColumnEncoder
Annotations
@inline()
final def setSource(buffer: ByteBuffer, releaseOld: Boolean): Unit

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnEncoder
def setUpdatePosition(position: Int): Unit
def sizeInBytes(cursor: Long): Long

Definition Classes
ColumnEncoder
final def storageAllocator: BufferAllocator

Get the allocator for the final data to be sent for storage.
Get the allocator for the final data to be sent for storage. It is on-heap for now in embedded mode while off-heap for connector mode to minimize copying in both cases. This should be changed to use the matching allocator as per the storage being used by column store in embedded mode.

Attributes
protected
Definition Classes
ColumnEncoder
def supports(dataType: DataType): Boolean

Definition Classes
ColumnDeltaEncoder → ColumnEncoding
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
def typeId: Int

Definition Classes
ColumnDeltaEncoder → ColumnEncoding
final def updateDecimalStats(value: Decimal): Unit

Attributes
protected
Definition Classes
ColumnEncoder
final def updateDoubleStats(value: Double): Unit

Attributes
protected
Definition Classes
ColumnEncoder
final def updateLongStats(value: Long): Unit

Attributes
protected
Definition Classes
ColumnEncoder
final def updateStringStats(value: UTF8String): Unit

Attributes
protected
Definition Classes
ColumnEncoder
final def upperDecimal: Decimal

Definition Classes
ColumnEncoder
final def upperDouble: Double

Definition Classes
ColumnEncoder
final def upperLong: Long

Definition Classes
ColumnEncoder
final def upperString: UTF8String

Definition Classes
ColumnEncoder
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def writeBinary(cursor: Long, value: Array[Byte]): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeBoolean(cursor: Long, value: Boolean): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeBooleanUnchecked(cursor: Long, value: Boolean): Long

Definition Classes
ColumnEncoder
def writeByte(cursor: Long, value: Byte): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeByteUnchecked(cursor: Long, value: Byte): Long

Definition Classes
ColumnEncoder
def writeDate(cursor: Long, value: Int): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeDecimal(cursor: Long, value: Decimal, position: Int, precision: Int, scale: Int): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeDouble(cursor: Long, value: Double): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeDoubleUnchecked(cursor: Long, value: Double): Long

Definition Classes
ColumnEncoder
def writeFloat(cursor: Long, value: Float): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeFloatUnchecked(cursor: Long, value: Float): Long

Definition Classes
ColumnEncoder
def writeInt(cursor: Long, value: Int): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeIntUnchecked(cursor: Long, value: Int): Long

Definition Classes
ColumnEncoder
def writeInternals(columnBytes: AnyRef, cursor: Long): Long

Write any internal structures (e.g.
Write any internal structures (e.g. dictionary) of the encoder that would normally be written by finish after the header and null bit mask.

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeInterval(cursor: Long, value: CalendarInterval): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeIsNull(position: Int): Unit

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeLong(cursor: Long, value: Long): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeLongDecimal(cursor: Long, value: Decimal, position: Int, precision: Int, scale: Int): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeLongUnchecked(cursor: Long, value: Long): Long

Definition Classes
ColumnEncoder
def writeNulls(columnBytes: AnyRef, cursor: Long, numWords: Int): Long

Attributes
protected[org.apache.spark.sql]
Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeShort(cursor: Long, value: Short): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeShortUnchecked(cursor: Long, value: Short): Long

Definition Classes
ColumnEncoder
final def writeStructBinary(cursor: Long, value: Array[Byte], fieldOffset: Long, baseOffset: Long): Long

Definition Classes
ColumnEncoder
final def writeStructDecimal(cursor: Long, value: Decimal, fieldOffset: Long, baseOffset: Long): Long

Definition Classes
ColumnEncoder
final def writeStructInterval(cursor: Long, value: CalendarInterval, fieldOffset: Long, baseOffset: Long): Long

Definition Classes
ColumnEncoder
final def writeStructUTF8String(cursor: Long, value: UTF8String, fieldOffset: Long, baseOffset: Long): Long

Definition Classes
ColumnEncoder
def writeTimestamp(cursor: Long, value: Long): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeUTF8String(cursor: Long, value: UTF8String): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder
def writeUnsafeData(cursor: Long, baseObject: AnyRef, baseOffset: Long, numBytes: Int): Long

Definition Classes
ColumnDeltaEncoder → ColumnEncoder

Related Doc: package encoding

final class ColumnDeltaEncoder extends ColumnEncoder

Instance Constructors

new ColumnDeltaEncoder(hierarchyDepth: Int)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final var _lowerDecimal: Decimal

final var _lowerDouble: Double

final var _lowerLong: Long

final var _lowerStr: UTF8String

final var _upperDecimal: Decimal

final var _upperDouble: Double

final var _upperLong: Long

final var _upperStr: UTF8String

final var allocator: BufferAllocator

final def asInstanceOf[T0]: T0

final var baseDataOffset: Long

final def baseOffset: Long

final var baseTypeOffset: Long

final def buffer: AnyRef

final def clearSource(newSize: Int, releaseData: Boolean): Unit

def clone(): AnyRef

def close(): Unit

final var columnBeginPosition: Long

final var columnBytes: AnyRef

final var columnData: ByteBuffer

final var columnEndPosition: Long

final def copyTo(dest: ByteBuffer, srcOffset: Int, endOffset: Int): Unit

def defaultSize(dataType: DataType): Int

def encodedSize(cursor: Long, dataBeginPosition: Long): Long

final def ensureCapacity(cursor: Long, required: Int): Long

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

final def expand(cursor: Long, required: Int): Long

def finalize(): Unit

def finish(encoderCursor: Long, numBaseRows: Int): ByteBuffer

def finish(encoderCursor: Long): ByteBuffer

def flushWithoutFinish(cursor: Long): Long

final var forComplexType: Boolean

final def getBaseDataOffset: Long

final def getBaseTypeOffset: Long

final def getClass(): Class[_]

def getMaxSizeForHierarchy(numColumnRows: Int): Int

def getNumNullWords: Int

def getRealEncoder: ColumnEncoder

def hashCode(): Int

val hierarchyDepth: Int

def initSizeInBytes(dataType: DataType, initSize: Long, defSize: Int): Long

def initialize(dataType: DataType, nullable: Boolean, initSize: Int, withHeader: Boolean, allocator: BufferAllocator, minBufferSize: Int = 1): Long

final def initialize(dataType: DataType, nullable: Boolean, initSize: Int, withHeader: Boolean, allocator: BufferAllocator): Long

final def initialize(field: StructField, initSize: Int, withHeader: Boolean, allocator: BufferAllocator): Long

final def initialize(field: StructField, initSize: Int, withHeader: Boolean): Long

final def initializeComplexType(cursor: Long, numElements: Int, skipBytes: Int, writeNumElements: Boolean): Long

def initializeLimits(): Unit

def initializeNulls(initSize: Int): Int

final def isAllocatorFinal: Boolean

final def isInstanceOf[T0]: Boolean

def isNullable: Boolean

final def lowerDecimal: Decimal

final def lowerDouble: Double

final def lowerLong: Long

final def lowerString: UTF8String

def merge(newValue: ByteBuffer, existingValue: ByteBuffer, existingIsDelta: Boolean, field: StructField): ByteBuffer

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def nullCount: Int

def offset(cursor: Long): Long

final def releaseForReuse(newSize: Int): Unit

final var reuseUsedSize: Int

final def setAllocator(allocator: BufferAllocator): Unit

final def setOffsetAndSize(cursor: Long, fieldOffset: Long, baseOffset: Long, size: Int): Unit

final def setSource(buffer: ByteBuffer, releaseOld: Boolean): Unit

def setUpdatePosition(position: Int): Unit

def sizeInBytes(cursor: Long): Long

final def storageAllocator: BufferAllocator

def supports(dataType: DataType): Boolean