class ApplyInPandasWithStatePythonRunner extends BasePythonRunner[InType, OutType] with PythonArrowInput[InType] with PythonArrowOutput[OutType]
A variant implementation of ArrowPythonRunner to serve the operation applyInPandasWithState.
Unlike normal ArrowPythonRunner which both input and output (executor <-> python worker) are InternalRow, applyInPandasWithState has side data (state information) in both input and output along with data, which requires different struct on Arrow RecordBatch.
- Alphabetic
- By Inheritance
- ApplyInPandasWithStatePythonRunner
- PythonArrowOutput
- PythonArrowInput
- BasePythonRunner
- Logging
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- new ApplyInPandasWithStatePythonRunner(funcs: Seq[ChainedPythonFunctions], evalType: Int, argOffsets: Array[Array[Int]], inputSchema: StructType, timeZoneId: String, initialWorkerConf: Map[String, String], stateEncoder: ExpressionEncoder[Row], keySchema: StructType, outputSchema: StructType, stateValueSchema: StructType, pythonMetrics: Map[String, SQLMetric])
Type Members
-
class
MonitorThread extends Thread
- Definition Classes
- BasePythonRunner
-
abstract
class
ReaderIterator extends Iterator[OUT]
- Definition Classes
- BasePythonRunner
-
class
WriterMonitorThread extends Thread
- Definition Classes
- BasePythonRunner
-
abstract
class
WriterThread extends Thread
- Definition Classes
- BasePythonRunner
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
val
accumulator: PythonAccumulatorV2
- Attributes
- protected
- Definition Classes
- BasePythonRunner
-
val
argOffsets: Array[Array[Int]]
- Attributes
- protected
- Definition Classes
- BasePythonRunner
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
val
authSocketTimeout: Long
- Attributes
- protected
- Definition Classes
- BasePythonRunner
-
val
bufferSize: Int
- Definition Classes
- ApplyInPandasWithStatePythonRunner → BasePythonRunner
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
compute(inputIterator: Iterator[InType], partitionIndex: Int, context: TaskContext): Iterator[OutType]
- Definition Classes
- BasePythonRunner
-
def
deserializeColumnarBatch(batch: ColumnarBatch, schema: StructType): OutType
Deserialize ColumnarBatch received from the Python worker to produce the output.
Deserialize ColumnarBatch received from the Python worker to produce the output. Schema info for given ColumnarBatch is also provided as well.
- Attributes
- protected
- Definition Classes
- ApplyInPandasWithStatePythonRunner → PythonArrowOutput
-
val
envVars: Map[String, String]
- Attributes
- protected
- Definition Classes
- BasePythonRunner
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
val
evalType: Int
- Attributes
- protected
- Definition Classes
- BasePythonRunner
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
val
funcs: Seq[ChainedPythonFunctions]
- Attributes
- protected
- Definition Classes
- BasePythonRunner
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
handleMetadataAfterExec(stream: DataInputStream): Unit
- Attributes
- protected
- Definition Classes
- PythonArrowOutput
-
def
handleMetadataBeforeExec(stream: DataOutputStream): Unit
This method sends out the additional metadata before sending out actual data.
This method sends out the additional metadata before sending out actual data.
Specifically, this class overrides this method to also write the schema for state value.
- Attributes
- protected
- Definition Classes
- ApplyInPandasWithStatePythonRunner → PythonArrowInput
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
newReaderIterator(stream: DataInputStream, writerThread: WriterThread, startTime: Long, env: SparkEnv, worker: Socket, pid: Option[Int], releasedOrClosed: AtomicBoolean, context: TaskContext): Iterator[OutType]
- Attributes
- protected
- Definition Classes
- PythonArrowOutput
-
def
newWriterThread(env: SparkEnv, worker: Socket, inputIterator: Iterator[InType], partitionIndex: Int, context: TaskContext): WriterThread
- Attributes
- protected
- Definition Classes
- PythonArrowInput
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
val
pythonExec: String
- Attributes
- protected
- Definition Classes
- BasePythonRunner
-
val
pythonMetrics: Map[String, SQLMetric]
- Definition Classes
- ApplyInPandasWithStatePythonRunner → PythonArrowOutput → PythonArrowInput
-
val
pythonVer: String
- Attributes
- protected
- Definition Classes
- BasePythonRunner
-
val
schema: StructType
- Attributes
- protected
- Definition Classes
- ApplyInPandasWithStatePythonRunner → PythonArrowInput
-
val
simplifiedTraceback: Boolean
- Definition Classes
- ApplyInPandasWithStatePythonRunner → BasePythonRunner
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
val
timeZoneId: String
- Attributes
- protected
- Definition Classes
- ApplyInPandasWithStatePythonRunner → PythonArrowInput
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
val
workerConf: Map[String, String]
- Attributes
- protected
- Definition Classes
- ApplyInPandasWithStatePythonRunner → PythonArrowInput
-
def
writeIteratorToArrowStream(root: VectorSchemaRoot, writer: ArrowStreamWriter, dataOut: DataOutputStream, inputIterator: Iterator[InType]): Unit
Read the (key, state, values) from input iterator and construct Arrow RecordBatches, and write constructed RecordBatches to the writer.
Read the (key, state, values) from input iterator and construct Arrow RecordBatches, and write constructed RecordBatches to the writer.
See ApplyInPandasWithStateWriter for more details.
- Attributes
- protected
- Definition Classes
- ApplyInPandasWithStatePythonRunner → PythonArrowInput