KafkaSparkStructuredStreamingReader

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def createStructuredStream(etl: StructuredStreamingETLModel, streamingReaderModel: StreamingReaderModel)(implicit ss: SparkSession): DataFrame

Creates a streaming DataFrame from a Kafka streaming source.
Creates a streaming DataFrame from a Kafka streaming source.
If all the input topics share the same schema the returned DataFrame will contain a column named "kafkaMetadata" with message metadata and the message contents either as a single column named "value" or as multiple columns named after the value fields depending on the topic datatype. If the input topics do not share the same schema the returned Dataframe will contain a column named "kafkaMetadata" with message metadata and each topic content on a column named after the topic name, previously escaped calling the function MultiTopicModel.topicNameToColumnName(). This means that if 5 topic models with different schema are read, the output dataframe will contain 6 columns, and of these 6 columns only the kafkaMetadata and the topic related to that message one, will have a value different from null, like the following:
```
+--------------------+--------------------+-------------------------+
|       kafkaMetadata|     test_json_topic|testcheckpoint_avro_topic|
+--------------------+--------------------+-------------------------+
|[45, [], test_jso...|[45, 45, [field1_...|                     null|
|[12, [], testchec...|                null|      [12, 77, [field1_..|
+--------------------+--------------------+-------------------------+
```
The "kafkaMetadata" column contains the following: - key: bytes - headers: array of {headerKey: string, headerValue: bytes} - topic: string - partition: int - offset: long - timestamp: timestamp - timestampType: int
The behaviour for message contents column(s) is the following: - the "avro" and "json" topic data types will output the columns specified by their schemas - the "plaintext" and "bytes" topic data types output a "value" column with the contents as string or bytes respectively
Definition Classes
KafkaSparkStructuredStreamingReader → SparkStructuredStreamingReader
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
val logger: WaspLogger

Attributes
protected
Definition Classes
Logging
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package kafka

object KafkaSparkStructuredStreamingReader extends SparkStructuredStreamingReader with Logging

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

def createStructuredStream(etl: StructuredStreamingETLModel, streamingReaderModel: StreamingReaderModel)(implicit ss: SparkSession): DataFrame

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

val logger: WaspLogger

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Logging

Inherited from SparkStructuredStreamingReader

Inherited from AnyRef

Inherited from Any

Ungrouped