spark

Type Members

class Analyzer extends AnyRef
class Args extends ScallopConf
abstract class BaseJoin extends AnyRef
case class BootstrapInfo(joinConf: api.Join, joinParts: Seq[JoinPartMetadata], externalParts: Seq[ExternalPartMetadata], derivations: Array[StructField], hashToSchema: Map[String, Array[StructField]]) extends Product with Serializable
class ChrononKryoRegistrator extends KryoRegistrator
class CpcSketchKryoSerializer extends Serializer[CpcSketch]
sealed trait DataRange extends AnyRef
class DummyExtensions extends (SparkSessionExtensions) ⇒ Unit
case class ExternalPartMetadata(externalPart: ExternalPart, keySchema: Array[StructField], valueSchema: Array[StructField]) extends Product with Serializable
class GroupBy extends Serializable
class GroupByUpload extends Serializable
sealed case class IncompatibleSchemaException(inconsistencies: Seq[(String, DataType, DataType)]) extends Exception with Product with Serializable
class ItemSketchSerializable extends Serializable
class ItemsSketchKryoSerializer extends Serializer[ItemSketchSerializable]
class Join extends BaseJoin
case class JoinPartMetadata(joinPart: JoinPart, keySchema: Array[StructField], valueSchema: Array[StructField]) extends Product with Serializable
case class KeyWithHash(data: Array[Any], hash: Array[Byte], hashInt: Int) extends Serializable with Product
case class KvRdd(data: RDD[(Array[Any], Array[Any])], keySchema: StructType, valueSchema: StructType)(implicit sparkSession: SparkSession) extends Product with Serializable
class LabelJoin extends AnyRef
class LogFlattenerJob extends Serializable

Purpose of LogFlattenerJob is to unpack serialized Avro data from online requests and flatten each field (both keys and values) into individual columns and save to an offline "flattened" log table.
Purpose of LogFlattenerJob is to unpack serialized Avro data from online requests and flatten each field (both keys and values) into individual columns and save to an offline "flattened" log table.
Steps: 1. determine unfilled range and pull raw logs from partitioned log table 2. fetch joinCodecs for all unique schema_hash present in the logs 3. build a merged schema from all schema versions, which will be used as output schema 4. unpack each row and adhere to the output schema 5. save the schema info in the flattened log table properties (cumulatively)
case class LoggingSchema(keyCodec: AvroCodec, valueCodec: AvroCodec) extends Product with Serializable
case class PartitionRange(start: String, end: String) extends DataRange with Ordered[PartitionRange] with Product with Serializable
class StagingQuery extends AnyRef
case class TableUtils(sparkSession: SparkSession) extends Product with Serializable
case class TimeRange(start: Long, end: Long) extends DataRange with Product with Serializable

Value Members

object BootstrapInfo extends Serializable
object Comparison
object Driver
object Extensions
object FastHashing
object GenericRowHandler
object GroupBy extends Serializable
object GroupByUpload extends Serializable
object JoinUtils
object LocalDataLoader
object LogFlattenerJob extends Serializable
object LogUtils
object LoggingSchema extends Serializable
object MetadataExporter
object SparkSessionBuilder
object StagingQuery
package stats
package streaming

package spark

Type Members

class Analyzer extends AnyRef

class Args extends ScallopConf

abstract class BaseJoin extends AnyRef

case class BootstrapInfo(joinConf: api.Join, joinParts: Seq[JoinPartMetadata], externalParts: Seq[ExternalPartMetadata], derivations: Array[StructField], hashToSchema: Map[String, Array[StructField]]) extends Product with Serializable

class ChrononKryoRegistrator extends KryoRegistrator

class CpcSketchKryoSerializer extends Serializer[CpcSketch]

sealed trait DataRange extends AnyRef

class DummyExtensions extends (SparkSessionExtensions) ⇒ Unit

case class ExternalPartMetadata(externalPart: ExternalPart, keySchema: Array[StructField], valueSchema: Array[StructField]) extends Product with Serializable

class GroupBy extends Serializable

class GroupByUpload extends Serializable

sealed case class IncompatibleSchemaException(inconsistencies: Seq[(String, DataType, DataType)]) extends Exception with Product with Serializable

class ItemSketchSerializable extends Serializable

class ItemsSketchKryoSerializer extends Serializer[ItemSketchSerializable]

class Join extends BaseJoin

case class JoinPartMetadata(joinPart: JoinPart, keySchema: Array[StructField], valueSchema: Array[StructField]) extends Product with Serializable

case class KeyWithHash(data: Array[Any], hash: Array[Byte], hashInt: Int) extends Serializable with Product

case class KvRdd(data: RDD[(Array[Any], Array[Any])], keySchema: StructType, valueSchema: StructType)(implicit sparkSession: SparkSession) extends Product with Serializable

class LabelJoin extends AnyRef

class LogFlattenerJob extends Serializable

case class LoggingSchema(keyCodec: AvroCodec, valueCodec: AvroCodec) extends Product with Serializable

case class PartitionRange(start: String, end: String) extends DataRange with Ordered[PartitionRange] with Product with Serializable

class StagingQuery extends AnyRef

case class TableUtils(sparkSession: SparkSession) extends Product with Serializable

case class TimeRange(start: Long, end: Long) extends DataRange with Product with Serializable

Value Members

object BootstrapInfo extends Serializable

object Comparison

object Driver

object Extensions

object FastHashing

object GenericRowHandler

object GroupBy extends Serializable

object GroupByUpload extends Serializable

object JoinUtils

object LocalDataLoader

object LogFlattenerJob extends Serializable

object LogUtils

object LoggingSchema extends Serializable

object MetadataExporter

object SparkSessionBuilder

object StagingQuery

package stats

package streaming

Ungrouped