SparkSubFeed

Instance Constructors

new SparkSubFeed(dataFrame: Option[DataFrame], dataObjectId: DataObjectId, partitionValues: Seq[PartitionValues], isDAGStart: Boolean = false, isSkipped: Boolean = false, isDummy: Boolean = false, filter: Option[String] = None)

dataFrame
Spark DataFrame to be processed. DataFrame should not be saved to state (@transient).
dataObjectId
id of the DataObject this SubFeed corresponds to
partitionValues
Values of Partitions transported by this SubFeed
isDAGStart
true if this subfeed is a start node of the dag
isSkipped
true if this subfeed is the result of a skipped action
isDummy
true if this subfeed only contains a dummy DataFrame. Dummy DataFrames can be used for validating the lineage in init phase, but not for the exec phase.
filter
a spark sql filter expression. This is used by SparkIncrementalMode.

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def breakLineage(implicit session: SparkSession, context: ActionPipelineContext): SparkSubFeed

Break lineage.
Break lineage. This means to discard an existing DataFrame or List of FileRefs, so that it is requested again from the DataObject. On one side this is usable to break long DataFrame Lineages over multiple Actions and instead reread the data from an intermediate table. On the other side it is needed if partition values or filter condition are changed.

Definition Classes
SparkSubFeed → SubFeed
def clearDAGStart(): SparkSubFeed

Definition Classes
SparkSubFeed → SubFeed
def clearFilter(breakLineageOnChange: Boolean = true)(implicit session: SparkSession, context: ActionPipelineContext): SparkSubFeed
def clearPartitionValues(breakLineageOnChange: Boolean = true)(implicit session: SparkSession, context: ActionPipelineContext): SparkSubFeed

Definition Classes
SparkSubFeed → SubFeed
def clearSkipped(): SparkSubFeed

Definition Classes
SparkSubFeed → SubFeed
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
val dataFrame: Option[DataFrame]

Spark DataFrame to be processed.
Spark DataFrame to be processed. DataFrame should not be saved to state (@transient).
val dataObjectId: DataObjectId

id of the DataObject this SubFeed corresponds to
id of the DataObject this SubFeed corresponds to

Definition Classes
SparkSubFeed → SubFeed
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
val filter: Option[String]

a spark sql filter expression.
a spark sql filter expression. This is used by SparkIncrementalMode.
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getFilterCol: Option[Column]
def hasReusableDataFrame: Boolean
val isDAGStart: Boolean

true if this subfeed is a start node of the dag
true if this subfeed is a start node of the dag

Definition Classes
SparkSubFeed → SubFeed
val isDummy: Boolean

true if this subfeed only contains a dummy DataFrame.
true if this subfeed only contains a dummy DataFrame. Dummy DataFrames can be used for validating the lineage in init phase, but not for the exec phase.
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
val isSkipped: Boolean

true if this subfeed is the result of a skipped action
true if this subfeed is the result of a skipped action

Definition Classes
SparkSubFeed → SubFeed
def isStreaming: Option[Boolean]
lazy val logger: Logger

Attributes
protected
Definition Classes
SmartDataLakeLogger
def movePartitionColumnsLast(partitions: Seq[String]): SparkSubFeed
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
val partitionValues: Seq[PartitionValues]

Values of Partitions transported by this SubFeed
Values of Partitions transported by this SubFeed

Definition Classes
SparkSubFeed → SubFeed
def persist: SparkSubFeed
def resultId: String

Definition Classes
SubFeed → DAGResult
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toOutput(dataObjectId: DataObjectId): SparkSubFeed

Definition Classes
SparkSubFeed → SubFeed
def union(other: SubFeed)(implicit session: SparkSession, context: ActionPipelineContext): SubFeed

Definition Classes
SparkSubFeed → SubFeed
def unionPartitionValues(otherPartitionValues: Seq[PartitionValues]): Seq[PartitionValues]

Definition Classes
SubFeed
def updatePartitionValues(partitions: Seq[String], breakLineageOnChange: Boolean = true, newPartitionValues: Option[Seq[PartitionValues]] = None)(implicit session: SparkSession, context: ActionPipelineContext): SparkSubFeed

Definition Classes
SparkSubFeed → SubFeed
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: object SparkSubFeed | package workflow

case class SparkSubFeed(dataFrame: Option[DataFrame], dataObjectId: DataObjectId, partitionValues: Seq[PartitionValues], isDAGStart: Boolean = false, isSkipped: Boolean = false, isDummy: Boolean = false, filter: Option[String] = None) extends SubFeed with Product with Serializable

Instance Constructors

new SparkSubFeed(dataFrame: Option[DataFrame], dataObjectId: DataObjectId, partitionValues: Seq[PartitionValues], isDAGStart: Boolean = false, isSkipped: Boolean = false, isDummy: Boolean = false, filter: Option[String] = None)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def breakLineage(implicit session: SparkSession, context: ActionPipelineContext): SparkSubFeed

def clearDAGStart(): SparkSubFeed

def clearFilter(breakLineageOnChange: Boolean = true)(implicit session: SparkSession, context: ActionPipelineContext): SparkSubFeed

def clearPartitionValues(breakLineageOnChange: Boolean = true)(implicit session: SparkSession, context: ActionPipelineContext): SparkSubFeed

def clearSkipped(): SparkSubFeed

def clone(): AnyRef

val dataFrame: Option[DataFrame]

val dataObjectId: DataObjectId

final def eq(arg0: AnyRef): Boolean

val filter: Option[String]

def finalize(): Unit

final def getClass(): Class[_]

def getFilterCol: Option[Column]

def hasReusableDataFrame: Boolean

val isDAGStart: Boolean

val isDummy: Boolean

final def isInstanceOf[T0]: Boolean

val isSkipped: Boolean

def isStreaming: Option[Boolean]

lazy val logger: Logger

def movePartitionColumnsLast(partitions: Seq[String]): SparkSubFeed

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

val partitionValues: Seq[PartitionValues]

def persist: SparkSubFeed

def resultId: String

final def synchronized[T0](arg0: ⇒ T0): T0

def toOutput(dataObjectId: DataObjectId): SparkSubFeed

def union(other: SubFeed)(implicit session: SparkSession, context: ActionPipelineContext): SubFeed

def unionPartitionValues(otherPartitionValues: Seq[PartitionValues]): Seq[PartitionValues]

def updatePartitionValues(partitions: Seq[String], breakLineageOnChange: Boolean = true, newPartitionValues: Option[Seq[PartitionValues]] = None)(implicit session: SparkSession, context: ActionPipelineContext): SparkSubFeed

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from SubFeed

Inherited from SmartDataLakeLogger

Inherited from DAGResult

Inherited from AnyRef

Inherited from Any

Ungrouped