streaming

Type Members

class BatchCommitLog extends HDFSMetadataLog[String]

Used to write log files that represent batch commit points in structured streaming.
abstract class CompactibleFileStreamLog[T <: AnyRef] extends HDFSMetadataLog[Array[T]]

An abstract class for compactible metadata logs.
class ConsoleSink extends Sink with Logging
class ConsoleSinkProvider extends StreamSinkProvider with DataSourceRegister
case class EventTimeStats(max: Long, min: Long, avg: Double, count: Long) extends Product with Serializable

Class for collecting event time stats with an accumulator
class EventTimeStatsAccum extends AccumulatorV2[Long, EventTimeStats]

Accumulator that collects stats on event time in a batch.
case class EventTimeWatermarkExec(eventTime: Attribute, delay: CalendarInterval, child: SparkPlan) extends SparkPlan with Product with Serializable

Used to mark a column as the containing the event time for a given record.
class FileStreamOptions extends Logging

User specified options for file streams.
class FileStreamSink extends Sink with Logging

A sink that writes out results to parquet files.
class FileStreamSinkLog extends CompactibleFileStreamLog[SinkFileStatus]

A special log for FileStreamSink.
class FileStreamSource extends Source with Logging

A very simple source that reads files from the given directory as they appear.
class FileStreamSourceLog extends CompactibleFileStreamLog[FileEntry]
case class FileStreamSourceOffset(logOffset: Long) extends Offset with Product with Serializable

Offset for the FileStreamSource.
case class FlatMapGroupsWithStateExec(func: (Any, Iterator[Any], LogicalGroupState[Any]) ⇒ Iterator[Any], keyDeserializer: Expression, valueDeserializer: Expression, groupingAttributes: Seq[Attribute], dataAttributes: Seq[Attribute], outputObjAttr: Attribute, stateId: Option[OperatorStateId], stateEncoder: ExpressionEncoder[Any], outputMode: OutputMode, timeoutConf: GroupStateTimeout, batchTimestampMs: Option[Long], eventTimeWatermark: Option[Long], child: SparkPlan) extends SparkPlan with UnaryExecNode with ObjectProducerExec with StateStoreWriter with WatermarkSupport with Product with Serializable

Physical operator for executing FlatMapGroupsWithState.
class ForeachSink[T] extends Sink with Serializable

A Sink that forwards all data into ForeachWriter according to the contract defined by ForeachWriter.
class HDFSMetadataLog[T <: AnyRef] extends MetadataLog[T] with Logging

A MetadataLog implementation based on HDFS.
class IncrementalExecution extends QueryExecution with Logging

A variant of QueryExecution that allows the execution of the given LogicalPlan plan incrementally.
case class LongOffset(offset: Long) extends Offset with Product with Serializable

A simple offset for sources that produce a single linear stream of data.
class ManifestFileCommitProtocol extends FileCommitProtocol with Serializable with Logging

A FileCommitProtocol that tracks the list of valid files in a manifest file, used in structured streaming.
case class MemoryPlan(sink: MemorySink, output: Seq[Attribute]) extends LeafNode with Product with Serializable

Used to query the data that has been written into a MemorySink.
class MemorySink extends Sink with Logging

A sink that stores the results in memory.
case class MemoryStream[A](id: Int, sqlContext: SQLContext)(implicit evidence$2: Encoder[A]) extends Source with Logging with Product with Serializable

A Source that produces value stored in memory as they are added by the user.
trait MetadataLog[T] extends AnyRef

A general MetadataLog that supports the following features:
class MetadataLogFileIndex extends PartitioningAwareFileIndex

A FileIndex that generates the list of files to processing by reading them from the metadata log files generated by the FileStreamSink.
class MetricsReporter extends metrics.source.Source with Logging

Serves metrics from a org.apache.spark.sql.streaming.StreamingQuery to Codahale/DropWizard metrics
abstract class Offset extends AnyRef

An offset is a monotonically increasing metric used to track progress in the computation of a stream.
case class OffsetSeq(offsets: Seq[Option[Offset]], metadata: Option[OffsetSeqMetadata] = scala.None) extends Product with Serializable

An ordered collection of offsets, used to track the progress of processing data from one or more Sources that are present in a streaming query.
class OffsetSeqLog extends HDFSMetadataLog[OffsetSeq]

This class is used to log offsets to persistent files in HDFS.
case class OffsetSeqMetadata(batchWatermarkMs: Long = 0, batchTimestampMs: Long = 0, conf: Map[String, String] = ...) extends Product with Serializable

Contains metadata associated with a OffsetSeq.
case class OneTimeExecutor() extends TriggerExecutor with Product with Serializable

A trigger executor that runs a single batch only, then terminates.
case class OperatorStateId(checkpointLocation: String, operatorId: Long, batchId: Long) extends Product with Serializable

Used to identify the state store for a given operator.
case class ProcessingTimeExecutor(processingTime: ProcessingTime, clock: Clock = ...) extends TriggerExecutor with Logging with Product with Serializable

A trigger executor that runs a batch every intervalMs milliseconds.
trait ProgressReporter extends Logging

Responsible for continually reporting statistics about the amount of data processed as well as latency for a streaming query.
class RateSourceProvider extends StreamSourceProvider with DataSourceRegister

A source that generates increment long values with timestamps.
class RateStreamSource extends Source with Logging
case class SerializedOffset(json: String) extends Offset with Product with Serializable

Used when loading a JSON serialized offset from external storage.
trait Sink extends AnyRef

An interface for systems that can collect the results of a streaming query.
case class SinkFileStatus(path: String, size: Long, isDir: Boolean, modificationTime: Long, blockReplication: Int, blockSize: Long, action: String) extends Product with Serializable

The status of a file outputted by FileStreamSink.
trait Source extends AnyRef

A source of continually arriving data for a streaming query.
trait State extends AnyRef

States for StreamExecution's lifecycle.
trait StateStoreReader extends SparkPlan with StatefulOperator

An operator that reads from a StateStore.
case class StateStoreRestoreExec(keyExpressions: Seq[Attribute], stateId: Option[OperatorStateId], child: SparkPlan) extends SparkPlan with UnaryExecNode with StateStoreReader with Product with Serializable

For each input tuple, the key is calculated and the value from the StateStore is added to the stream (in addition to the input tuple) if present.
case class StateStoreSaveExec(keyExpressions: Seq[Attribute], stateId: Option[OperatorStateId] = scala.None, outputMode: Option[OutputMode] = scala.None, eventTimeWatermark: Option[Long] = scala.None, child: SparkPlan) extends SparkPlan with UnaryExecNode with StateStoreWriter with WatermarkSupport with Product with Serializable

For each input tuple, the key is calculated and the tuple is put into the StateStore.
trait StateStoreWriter extends SparkPlan with StatefulOperator

An operator that writes to a StateStore.
trait StatefulOperator extends SparkPlan

An operator that reads or writes state from the StateStore.
class StreamExecution extends StreamingQuery with ProgressReporter with Logging

Manages the execution of a streaming Spark SQL query that is occurring in a separate thread.
abstract class StreamExecutionThread extends UninterruptibleThread

A special thread to run the stream query.
case class StreamMetadata(id: String) extends Product with Serializable

Contains metadata associated with a StreamingQuery.
class StreamProgress extends Map[Source, Offset]

A helper class that looks like a Map[Source, Offset].
case class StreamingDeduplicateExec(keyExpressions: Seq[Attribute], child: SparkPlan, stateId: Option[OperatorStateId] = scala.None, eventTimeWatermark: Option[Long] = scala.None) extends SparkPlan with UnaryExecNode with StateStoreWriter with WatermarkSupport with Product with Serializable

Physical operator for executing streaming Deduplicate.
case class StreamingExecutionRelation(source: Source, output: Seq[Attribute]) extends LeafNode with Product with Serializable

Used to link a streaming Source of data into a org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.
class StreamingQueryListenerBus extends SparkListener with ListenerBus[StreamingQueryListener, Event]

A bus to forward events to StreamingQueryListeners.
class StreamingQueryWrapper extends StreamingQuery with Serializable

Wrap non-serializable StreamExecution to make the query serializable as it's easy to for it to get captured with normal usage.
case class StreamingRelation(dataSource: DataSource, sourceName: String, output: Seq[Attribute]) extends LeafNode with Product with Serializable

Used to link a streaming DataSource into a org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.
case class StreamingRelationExec(sourceName: String, output: Seq[Attribute]) extends SparkPlan with LeafExecNode with Product with Serializable

A dummy physical plan for StreamingRelation to support org.apache.spark.sql.Dataset.explain
class TextSocketSource extends Source with Logging

A source that reads text lines through a TCP socket, designed only for tutorials and debugging.
class TextSocketSourceProvider extends StreamSourceProvider with DataSourceRegister with Logging
trait TriggerExecutor extends AnyRef
trait WatermarkSupport extends SparkPlan with UnaryExecNode

An operator that supports watermark.

Value Members

object ACTIVE extends State with Product with Serializable
object BatchCommitLog
object CompactibleFileStreamLog
object EventTimeStats extends Serializable
object FileStreamSink extends Logging
object FileStreamSinkLog
object FileStreamSource
object FileStreamSourceLog
object FileStreamSourceOffset extends Serializable
object HDFSMetadataLog
object INITIALIZING extends State with Product with Serializable
object LongOffset extends Serializable
object MemoryStream extends Serializable
object OffsetSeq extends Serializable
object OffsetSeqLog
object OffsetSeqMetadata extends Serializable
object OneTimeTrigger extends Trigger with Product with Serializable

A Trigger that process only one batch of data in a streaming query then terminates the query.
object RateSourceProvider
object RateStreamSource
object SinkFileStatus extends Serializable
object StreamMetadata extends Logging with Serializable
object StreamingDeduplicateExec extends Serializable
object StreamingExecutionRelation extends Serializable
object StreamingRelation extends Serializable
object TERMINATED extends State with Product with Serializable
object TextSocketSource
package state

package streaming

Type Members

class BatchCommitLog extends HDFSMetadataLog[String]

abstract class CompactibleFileStreamLog[T <: AnyRef] extends HDFSMetadataLog[Array[T]]

class ConsoleSink extends Sink with Logging

class ConsoleSinkProvider extends StreamSinkProvider with DataSourceRegister

case class EventTimeStats(max: Long, min: Long, avg: Double, count: Long) extends Product with Serializable

class EventTimeStatsAccum extends AccumulatorV2[Long, EventTimeStats]

case class EventTimeWatermarkExec(eventTime: Attribute, delay: CalendarInterval, child: SparkPlan) extends SparkPlan with Product with Serializable

class FileStreamOptions extends Logging

class FileStreamSink extends Sink with Logging

class FileStreamSinkLog extends CompactibleFileStreamLog[SinkFileStatus]

class FileStreamSource extends Source with Logging

class FileStreamSourceLog extends CompactibleFileStreamLog[FileEntry]

case class FileStreamSourceOffset(logOffset: Long) extends Offset with Product with Serializable

class ForeachSink[T] extends Sink with Serializable

class HDFSMetadataLog[T <: AnyRef] extends MetadataLog[T] with Logging

class IncrementalExecution extends QueryExecution with Logging

case class LongOffset(offset: Long) extends Offset with Product with Serializable

class ManifestFileCommitProtocol extends FileCommitProtocol with Serializable with Logging

case class MemoryPlan(sink: MemorySink, output: Seq[Attribute]) extends LeafNode with Product with Serializable

class MemorySink extends Sink with Logging

case class MemoryStream[A](id: Int, sqlContext: SQLContext)(implicit evidence$2: Encoder[A]) extends Source with Logging with Product with Serializable

trait MetadataLog[T] extends AnyRef

class MetadataLogFileIndex extends PartitioningAwareFileIndex

class MetricsReporter extends metrics.source.Source with Logging

abstract class Offset extends AnyRef

case class OffsetSeq(offsets: Seq[Option[Offset]], metadata: Option[OffsetSeqMetadata] = scala.None) extends Product with Serializable

class OffsetSeqLog extends HDFSMetadataLog[OffsetSeq]

case class OffsetSeqMetadata(batchWatermarkMs: Long = 0, batchTimestampMs: Long = 0, conf: Map[String, String] = ...) extends Product with Serializable

case class OneTimeExecutor() extends TriggerExecutor with Product with Serializable

case class OperatorStateId(checkpointLocation: String, operatorId: Long, batchId: Long) extends Product with Serializable

case class ProcessingTimeExecutor(processingTime: ProcessingTime, clock: Clock = ...) extends TriggerExecutor with Logging with Product with Serializable

trait ProgressReporter extends Logging

class RateSourceProvider extends StreamSourceProvider with DataSourceRegister

class RateStreamSource extends Source with Logging

case class SerializedOffset(json: String) extends Offset with Product with Serializable

trait Sink extends AnyRef

case class SinkFileStatus(path: String, size: Long, isDir: Boolean, modificationTime: Long, blockReplication: Int, blockSize: Long, action: String) extends Product with Serializable

trait Source extends AnyRef

trait State extends AnyRef

trait StateStoreReader extends SparkPlan with StatefulOperator

case class StateStoreRestoreExec(keyExpressions: Seq[Attribute], stateId: Option[OperatorStateId], child: SparkPlan) extends SparkPlan with UnaryExecNode with StateStoreReader with Product with Serializable

trait StateStoreWriter extends SparkPlan with StatefulOperator

trait StatefulOperator extends SparkPlan

class StreamExecution extends StreamingQuery with ProgressReporter with Logging

abstract class StreamExecutionThread extends UninterruptibleThread

case class StreamMetadata(id: String) extends Product with Serializable

class StreamProgress extends Map[Source, Offset]

case class StreamingDeduplicateExec(keyExpressions: Seq[Attribute], child: SparkPlan, stateId: Option[OperatorStateId] = scala.None, eventTimeWatermark: Option[Long] = scala.None) extends SparkPlan with UnaryExecNode with StateStoreWriter with WatermarkSupport with Product with Serializable

case class StreamingExecutionRelation(source: Source, output: Seq[Attribute]) extends LeafNode with Product with Serializable

class StreamingQueryListenerBus extends SparkListener with ListenerBus[StreamingQueryListener, Event]

class StreamingQueryWrapper extends StreamingQuery with Serializable

case class StreamingRelation(dataSource: DataSource, sourceName: String, output: Seq[Attribute]) extends LeafNode with Product with Serializable

case class StreamingRelationExec(sourceName: String, output: Seq[Attribute]) extends SparkPlan with LeafExecNode with Product with Serializable

class TextSocketSource extends Source with Logging

class TextSocketSourceProvider extends StreamSourceProvider with DataSourceRegister with Logging

trait TriggerExecutor extends AnyRef

trait WatermarkSupport extends SparkPlan with UnaryExecNode

Value Members

object ACTIVE extends State with Product with Serializable

object BatchCommitLog

object CompactibleFileStreamLog

object EventTimeStats extends Serializable

object FileStreamSink extends Logging

object FileStreamSinkLog

object FileStreamSource

object FileStreamSourceLog

object FileStreamSourceOffset extends Serializable

object HDFSMetadataLog

object INITIALIZING extends State with Product with Serializable

object LongOffset extends Serializable

object MemoryStream extends Serializable

object OffsetSeq extends Serializable

object OffsetSeqLog

object OffsetSeqMetadata extends Serializable

object OneTimeTrigger extends Trigger with Product with Serializable

object RateSourceProvider

object RateStreamSource