package
spark
Type Members
-
class
Analyzer extends AnyRef
-
class
Args extends ScallopConf
-
abstract
class
BaseJoin extends AnyRef
-
case class
BootstrapInfo(joinConf: api.Join, joinParts: Seq[JoinPartMetadata], externalParts: Seq[ExternalPartMetadata], derivations: Array[StructField], hashToSchema: Map[String, Array[StructField]]) extends Product with Serializable
-
-
-
sealed
trait
DataRange extends AnyRef
-
class
DummyExtensions extends (SparkSessionExtensions) ⇒ Unit
-
case class
ExternalPartMetadata(externalPart: ExternalPart, keySchema: Array[StructField], valueSchema: Array[StructField]) extends Product with Serializable
-
class
GroupBy extends Serializable
-
class
GroupByUpload extends Serializable
-
sealed
case class
IncompatibleSchemaException(inconsistencies: Seq[(String, DataType, DataType)]) extends Exception with Product with Serializable
-
-
-
-
case class
JoinPartMetadata(joinPart: JoinPart, keySchema: Array[StructField], valueSchema: Array[StructField]) extends Product with Serializable
-
case class
KeyWithHash(data: Array[Any], hash: Array[Byte], hashInt: Int) extends Serializable with Product
-
case class
KvRdd(data: RDD[(Array[Any], Array[Any])], keySchema: StructType, valueSchema: StructType)(implicit sparkSession: SparkSession) extends Product with Serializable
-
class
LabelJoin extends AnyRef
-
-
case class
LoggingSchema(keyCodec: AvroCodec, valueCodec: AvroCodec) extends Product with Serializable
-
case class
PartitionRange(start: String, end: String) extends DataRange with Ordered[PartitionRange] with Product with Serializable
-
-
case class
TableUtils(sparkSession: SparkSession) extends Product with Serializable
-
case class
TimeRange(start: Long, end: Long) extends DataRange with Product with Serializable
Purpose of LogFlattenerJob is to unpack serialized Avro data from online requests and flatten each field (both keys and values) into individual columns and save to an offline "flattened" log table.
Steps: 1. determine unfilled range and pull raw logs from partitioned log table 2. fetch joinCodecs for all unique schema_hash present in the logs 3. build a merged schema from all schema versions, which will be used as output schema 4. unpack each row and adhere to the output schema 5. save the schema info in the flattened log table properties (cumulatively)