Package

it.agilelab.bigdata.wasp

models

Permalink

package models

Visibility
  1. Public
  2. All

Type Members

  1. sealed trait BatchETL extends AnyRef

    Permalink
  2. case class BatchETLModel(name: String, inputs: List[ReaderModel], output: WriterModel, mlModels: List[MlModelOnlyInfo], strategy: Option[StrategyModel], kafkaAccessType: String, group: String = "default", isActive: Boolean = false) extends BatchETL with Product with Serializable

    Permalink
  3. case class BatchGdprETLModel(name: String, dataStores: List[DataStoreConf], strategyConfig: String, inputs: List[ReaderModel], output: WriterModel, group: String = "default", isActive: Boolean = false) extends BatchETL with Product with Serializable

    Permalink
  4. case class BatchJobExclusionConfig(isFullyExclusive: Boolean, restConfigExclusiveParams: Seq[String]) extends Product with Serializable

    Permalink
  5. case class BatchJobInstanceModel(name: String, instanceOf: String, startTimestamp: Long, currentStatusTimestamp: Long, status: JobStatus, restConfig: Config = ConfigFactory.empty, error: Option[String] = None) extends Model with Product with Serializable

    Permalink
  6. trait BatchJobJsonSupport extends DefaultJsonProtocol

    Permalink
  7. case class BatchJobModel(name: String, description: String, owner: String, system: Boolean, creationTime: Long, etl: BatchETL, exclusivityConfig: BatchJobExclusionConfig = ...) extends Model with Product with Serializable

    Permalink
  8. case class BatchSchedulerModel(name: String, cronExpression: String, batchJob: Option[String], options: Option[BsonDocument] = None, isActive: Boolean = true) extends Model with Product with Serializable

    Permalink
  9. case class CdcModel(name: String, uri: String, schema: String, options: CdcOptions = CdcOptions.default) extends DatastoreModel with Product with Serializable

    Permalink

    A named model for mutations coming from a CDC tool.

    A named model for mutations coming from a CDC tool. This model should be used together with the Cdc writer plugin in order to write these mutations into a Delta Lake table on HDFS.

    uri is the location on HDFS where the Delta Table will be created.

    schema is a json-encoded DataFrame schema, that is, a StructType. See DataType.fromJson and DataType.json.

    options control the underlying spark DeltaLakeWriter in the writers using an instance of this model.

    name

    the name of the datastore

    uri

    the uri where the data are meant to be written

    schema

    the schema of the data

    options

    the options for the datastore

  10. case class CdcOptions(saveMode: String, format: String = "delta", extraOptions: Option[Map[String, String]] = None, partitionBy: Option[List[String]] = None) extends Product with Serializable

    Permalink

    Options for a CdcModel:

    Options for a CdcModel:

    saveMode specifies the behaviour when saving and the output uri already exists; valid values are:

    • "error", throw an error and do not save anything
    • "overwrite", overwrite existing data
    • "append", append to existing data
    • "ignore", do not save anything and don't throw any errors
    • "default", like "error" for it.agilelab.bigdata.wasp.consumers.SparkWriter, like "append" for it.agilelab.bigdata.wasp.consumers.SparkStreamingWriter

    format specifies the data format to use; valid values are:

    • "delta" (this is the default)
    • "parquet"
    • "orc"
    • "json"
    • any format accepted by the available Spark DataFrameWriters

    extraOptions allows specifying any writer-specific options accepted by DataFrameReader/Writer.option

    partitionBy allows specifying columns to be used to partition the data by using different directories for different values

    saveMode

    specifies the behaviour when the output uri exists

    format

    specifies the format to use

    extraOptions

    extra options for the underlying writer

  11. case class CompletionModel(toComplete: String, info: String) extends Product with Serializable

    Permalink
  12. case class ContainsRawMatchingStrategy(dataframeKeyMatchingExpression: String) extends RawMatchingStrategy with Product with Serializable

    Permalink
  13. case class CountEntry(timestamp: Instant, count: Map[String, Int]) extends Product with Serializable

    Permalink
  14. case class Counts(logs: Seq[CountEntry], telemetry: Seq[CountEntry], events: Seq[CountEntry]) extends Product with Serializable

    Permalink
  15. case class DashboardModel(url: String, needsFilterBox: Boolean) extends Product with Serializable

    Permalink
  16. sealed trait DataStoreConf extends AnyRef

    Permalink
  17. trait DataStoreConfJsonSupport extends DefaultJsonProtocol

    Permalink
  18. abstract class DatastoreModel extends Model

    Permalink

    Base datastore model.

  19. case class DocumentModel(name: String, connectionString: String, schema: String) extends DatastoreModel with Product with Serializable

    Permalink
  20. case class ErrorModel(fileName: String, where: String, errorType: String, msg: String, content: String, indicator: String) extends Product with Serializable

    Permalink
  21. case class EventEntry(eventType: String, eventId: String, severity: String, payload: String, timestamp: Instant, source: String, sourceId: String, eventRuleName: String) extends Product with Serializable

    Permalink
  22. case class Events(found: Long, entries: Seq[EventEntry]) extends Product with Serializable

    Permalink
  23. final case class ExactKeyValueMatchingStrategy() extends KeyValueMatchingStrategy with Product with Serializable

    Permalink
  24. case class ExactRawMatchingStrategy(dataframeKeyMatchingExpression: String) extends RawMatchingStrategy with Product with Serializable

    Permalink
  25. case class FreeCode(code: String) extends Product with Serializable

    Permalink
  26. case class FreeCodeModel(name: String, code: String) extends Model with Product with Serializable

    Permalink
  27. case class GdprStrategyModel(className: String, dataStoresConf: List[DataStoreConf], configuration: Option[String] = None) extends Product with Serializable

    Permalink
  28. case class GenericModel(name: String, value: BsonDocument, product: GenericProduct, options: GenericOptions = GenericOptions.default) extends DatastoreModel with Product with Serializable

    Permalink
  29. case class GenericOptions(options: Option[Map[String, String]] = None) extends Product with Serializable

    Permalink
  30. sealed abstract class HttpCompression extends AnyRef

    Permalink
  31. case class HttpModel(name: String, url: String, method: String, headersFieldName: Option[String], valueFieldsNames: List[String], compression: HttpCompression, mediaType: String, logBody: Boolean, structured: Boolean = true) extends DatastoreModel with Product with Serializable

    Permalink

    The HttpModel used by HttpWriter to send data with HTTP protocol

    The HttpModel used by HttpWriter to send data with HTTP protocol

    name

    The httpModel name

    url

    The url to send the request to

    method

    The HTTP methods: GET, POST, PUT, PATCH, DELETE

    headersFieldName

    The name of the DataFrame column to be used as http headers, it must be of type Map[String,String], if None, no header will be sent in the request, except for the content-type and content-encoding ones

    valueFieldsNames

    The list of DataFrame columns to be rendered as json in the http request body. If the passed list is empty, all the fields, except the headers field (if any) will be rendered as a json object. If there is only one field, the behaviour is controlled by the structured field

    compression

    The HttpCompression

    mediaType

    The format of the request content

    logBody

    It enable the request body logger

    structured

    Indicates how the request body will be rendered. The effect of this configuration has effect only if the DataFrame contains only one column to be sent and only if it is of ArrayType or MapType. If structured is true the array or map will always be enclosed in a json object, otherwise the map or the array will be at the top level of the json document. Input dataframe:

    +---------+
    |  values |
    +---------+
    |[3, 4, 5]|
    +---------+

    Request with structured = true

    {"values" : [3, 4, 5]}

    Request with structured = false

    [3, 4, 5]
  32. case class IndexModel(name: String, creationTime: Long, schema: Option[String], query: Option[String] = None, numShards: Option[Int] = Some(1), replicationFactor: Option[Int] = Some(1), rollingIndex: Boolean = true, idField: Option[String] = None, options: Map[String, String] = Map.empty) extends DatastoreModel with Product with Serializable

    Permalink
  33. class IndexModelBuilder[Stage <: Stage, Kind <: DataStoreKind] extends AnyRef

    Permalink

    A builder able to create instances of IndexModel.

    A builder able to create instances of IndexModel.

    Stage

    The current Stage of the builder.

    Kind

    The kind of DataStore whose index is being built.

  34. final case class KeyValueDataStoreConf(inputKeyColumn: String, correlationIdColumn: String, keyValueModel: KeyValueModel, keyValueMatchingStrategy: KeyValueMatchingStrategy) extends DataStoreConf with Product with Serializable

    Permalink
  35. sealed trait KeyValueMatchingStrategy extends AnyRef

    Permalink
  36. case class KeyValueModel(name: String, tableCatalog: String, dataFrameSchema: Option[String], options: Option[Seq[KeyValueOption]], useAvroSchemaManager: Boolean, avroSchemas: Option[Map[String, String]]) extends DatastoreModel with Product with Serializable

    Permalink
  37. case class KeyValueOption(key: String, value: String) extends Product with Serializable

    Permalink
  38. case class LogEntry(log_source: String, log_level: String, message: String, timestamp: Instant, thread: String, cause: Option[String] = None, stacktrace: Option[String] = None) extends Product with Serializable

    Permalink
  39. case class Logs(found: Long, entries: Seq[LogEntry]) extends Product with Serializable

    Permalink
  40. trait Metadata extends AnyRef

    Permalink
  41. case class MetadataModel(id: String, sourceId: String, arrivalTimestamp: Long, lastSeenTimestamp: Long, path: Array[PathModel]) extends Product with Serializable

    Permalink
  42. case class MetricEntry(source: SourceEntry, name: String) extends Product with Serializable

    Permalink
  43. case class Metrics(found: Long, entries: Seq[MetricEntry]) extends Product with Serializable

    Permalink
  44. case class MlModelOnlyInfo(name: String, version: String, className: Option[String] = None, timestamp: Option[Long] = None, modelFileId: Option[BsonObjectId] = None, favorite: Boolean = false, description: String = "") extends Model with Product with Serializable

    Permalink
  45. trait Model extends AnyRef

    Permalink
  46. case class MultiTopicModel extends DatastoreModel with Product with Serializable

    Permalink

    A model for grouping of topics.

    A model for grouping of topics.

    The name field specifies the name of the model, which is used as the unique identifier for the model in the models database.

    The topicNameField field specifies the field whose contents will be used as the name of the topic to which the message will be sent when writing to Kafka. The field must be of type string. The original field will be left as-is, so your schema must handle it (or you can use valueFieldsNames).

    The topicModelNames contains the names of the topic model that constitute this grouping of topics.

    The topic models that constitute this grouping of topics must: - consist of at least one topic model - be all different models - refer to different topics - use the same settings for everything but partitions and replicas

  47. case class NoPartitionPruningStrategy() extends PartitionPruningStrategy with Product with Serializable

    Permalink
  48. sealed trait PartitionPruningStrategy extends AnyRef

    Permalink
  49. case class PathModel(name: String, ts: Long) extends Product with Serializable

    Permalink
  50. final case class PipegraphInstanceModel(name: String, instanceOf: String, startTimestamp: Long, currentStatusTimestamp: Long, status: PipegraphStatus, executedByNode: Option[String], peerActor: Option[String], error: Option[String] = None) extends Model with Product with Serializable

    Permalink
  51. case class PipegraphModel(name: String, description: String, owner: String, isSystem: Boolean, creationTime: Long, legacyStreamingComponents: List[LegacyStreamingETLModel], structuredStreamingComponents: List[StructuredStreamingETLModel], rtComponents: List[RTModel], dashboard: Option[DashboardModel] = None, labels: Set[String] = Set.empty, enrichmentSources: RestEnrichmentConfigModel = ...) extends Model with Product with Serializable

    Permalink

    A model for a pipegraph, a processing pipeline abstraction.

    A model for a pipegraph, a processing pipeline abstraction.

    name

    name of the pipegraph

    description

    description of the pipegraph

    owner

    owner of the pipegraph

    isSystem

    whether the pipegraph is from the WASP system

    creationTime

    time of creation of the pipegraph

    legacyStreamingComponents

    components describing processing built on Spark Legacy Streaming

    structuredStreamingComponents

    components describing processing built on Spark Structured Streaming

    rtComponents

    components describing processing built on Akka actors

    dashboard

    dashboard of the pipegraph

  52. final case class PrefixAndTimeBoundKeyValueMatchingStrategy(separator: String, pattern: String, locale: String = "UTC") extends KeyValueMatchingStrategy with Product with Serializable

    Permalink
  53. final case class PrefixKeyValueMatchingStrategy() extends KeyValueMatchingStrategy with Product with Serializable

    Permalink
  54. case class PrefixRawMatchingStrategy(dataframeKeyMatchingExpression: String) extends RawMatchingStrategy with Product with Serializable

    Permalink
  55. case class ProcessGroupModel(name: String, content: BsonDocument, errorPort: String) extends Model with Product with Serializable

    Permalink
  56. trait ProcessingComponentModel extends AnyRef

    Permalink
  57. case class ProducerModel(name: String, className: String, topicName: Option[String], isActive: Boolean = false, configuration: Option[String] = None, isRemote: Boolean, isSystem: Boolean) extends Model with Product with Serializable

    Permalink

    DataSource class.

    DataSource class. The fields must be the same as the ones inside the MongoDB document associated with this model *

  58. case class RTModel(name: String, inputs: List[ReaderModel], isActive: Boolean = false, strategy: Option[StrategyModel] = None, endpoint: Option[WriterModel] = None) extends ProcessingComponentModel with Product with Serializable

    Permalink
  59. final case class RawDataStoreConf(inputKeyColumn: String, correlationIdColumn: String, rawModel: RawModel, rawMatchingStrategy: RawMatchingStrategy, partitionPruningStrategy: PartitionPruningStrategy, missingPathFailure: Boolean = false) extends DataStoreConf with Product with Serializable

    Permalink
  60. sealed trait RawMatchingStrategy extends AnyRef

    Permalink
  61. case class RawModel(name: String, uri: String, timed: Boolean = true, schema: String, options: RawOptions = RawOptions.default) extends DatastoreModel with Product with Serializable

    Permalink

    A named model for data stored as files on a raw datastore (eg HDFS).

    A named model for data stored as files on a raw datastore (eg HDFS).

    The uri is augmented with time information if timed is true. For writers this means whether to use uri as-is or create timed namespaces (eg for HDFS, a subdirectory) inside; for readers whether to read from uri as-is or from the most recent timed namespace inside.

    schema is a json-encoded DataFrame schema, that is, a StructType. See DataType.fromJson and DataType.json.

    options control the underlying spark DataFrameWriter/Reader in the writers/readers using an instance of this model.

    name

    the name of the datastore

    uri

    the uri where the data files reside

    timed

    whether the uri must be augmented with time information

    schema

    the schema of the data

    options

    the options for the datastore

  62. case class RawOptions(saveMode: String, format: String, extraOptions: Option[Map[String, String]] = None, partitionBy: Option[List[String]] = None) extends Product with Serializable

    Permalink

    Options for a raw datastore.

    Options for a raw datastore.

    saveMode specifies the behaviour when saving and the output uri already exists; valid values are:

    • "error", throw an error and do not save anything
    • "overwrite", overwrite existing data
    • "append", append to existing data
    • "ignore", do not save anything and don't throw any errors
    • "default", like "error" for it.agilelab.bigdata.wasp.consumers.SparkWriter, like "append" for it.agilelab.bigdata.wasp.consumers.SparkStreamingWriter

    format specifies the data format to use; valid values are:

    • "parquet" (this is the default)
    • "orc"
    • "json"
    • any format accepted by the available Spark DataFrameWriters

    extraOptions allows specifying any writer-specific options accepted by DataFrameReader/Writer.option

    partitionBy allows specifying columns to be used to partition the data by using different directories for different values

    saveMode

    specifies the behaviour when the output uri exists

    format

    specifies the format to use

    extraOptions

    extra options for the underlying writer

  63. case class ReaderModel extends Product with Serializable

    Permalink

    A model for a reader, composed by a name, a datastoreModelName defining the datastore, a datastoreProduct defining the datastore software product to use, and any additional options needed to configure the reader.

  64. case class SourceEntry(name: String) extends Product with Serializable

    Permalink
  65. case class Sources(found: Long, entries: Seq[SourceEntry]) extends Product with Serializable

    Permalink
  66. case class SqlSourceModel(name: String, connectionName: String, dbtable: String, partitioningInfo: Option[JdbcPartitioningInfo], numPartitions: Option[Int], fetchSize: Option[Int]) extends DatastoreModel with Product with Serializable

    Permalink

    Class representing a SqlSource model

    Class representing a SqlSource model

    name

    The name of the SqlSource model

    connectionName

    The name of the connection to use. N.B. have to be present in jdbc-subConfig

    dbtable

    The name of the table

    partitioningInfo

    optional - Partition info (column, lowerBound, upperBound)

    numPartitions

    optional - Number of partitions

    fetchSize

    optional - Fetch size

  67. case class StrategyModel(className: String, configuration: Option[String] = None) extends Product with Serializable

    Permalink
  68. case class StreamingReaderModel extends Product with Serializable

    Permalink
  69. case class StructuredStreamingETLModel(name: String, group: String = "default", streamingInput: StreamingReaderModel, staticInputs: List[ReaderModel], streamingOutput: WriterModel, mlModels: List[MlModelOnlyInfo], strategy: Option[StrategyModel], triggerIntervalMs: Option[Long], options: Map[String, String] = Map.empty) extends ProcessingComponentModel with Product with Serializable

    Permalink

    A streaming processing component that leverages Spark's Structured Streaming API.

    A streaming processing component that leverages Spark's Structured Streaming API.

    name

    unique name of the processing component

    group

    group of which the processing component is part

    staticInputs

    list of inputs for static datasets

    streamingOutput

    streaming output

    mlModels

    machine learning models to be used in the processing

    strategy

    strategy model that defines the processing

    triggerIntervalMs

    trigger interval to use, in milliseconds

    options

    has no effect at all

  70. sealed trait SubjectStrategy extends AnyRef

    Permalink
  71. case class TelemetryEntry(source: SourceEntry, metric: MetricEntry, messageId: String, value: Long, timestamp: Instant) extends Product with Serializable

    Permalink
  72. case class TelemetryPoint(timestamp: Instant, value: Double) extends Product with Serializable

    Permalink
  73. case class TelemetrySeries(source: SourceEntry, metric: MetricEntry, series: Seq[TelemetryPoint]) extends Product with Serializable

    Permalink
  74. final case class TimeBasedBetweenPartitionPruningStrategy(columnName: String, isDateNumeric: Boolean, pattern: String, granularity: String) extends PartitionPruningStrategy with Product with Serializable

    Permalink
  75. sealed abstract class TopicCompression extends AnyRef

    Permalink
  76. case class TopicModel(name: String, creationTime: Long, partitions: Int, replicas: Int, topicDataType: String, keyFieldName: Option[String], headersFieldName: Option[String], valueFieldsNames: Option[Seq[String]], useAvroSchemaManager: Boolean, schema: BsonDocument, topicCompression: TopicCompression = TopicCompression.Disabled, subjectStrategy: SubjectStrategy = SubjectStrategy.None, keySchema: Option[String] = None) extends DatastoreModel with Product with Serializable

    Permalink

    A model for a topic, that is, a message queue of some sort.

    A model for a topic, that is, a message queue of some sort. Right now this means just Kafka topics.

    name

    the name of the topic, and doubles as the unique identifier for the model in the models database

    creationTime

    marks the time at which the model was generated.

    partitions

    the number of partitions used for the topic when wasp creates it

    replicas

    the number of replicas used for the topic when wasp creates it

    topicDataType

    field specifies the format to use when encoding/decoding data to/from messages, allowed values are: avro, plaintext, json, binary

    keyFieldName

    optionally specify a field whose contents will be used as a message key when writing to Kafka. The field must be of type string or binary. The original field will be left as-is, so you schema must handle it (or you can use valueFieldsNames).

    headersFieldName

    allows you to optionally specify a field whose contents will be used as message headers when writing to Kafka. The field must contain an array of non-null objects which must have a non-null field headerKey of type string and a field headerValue of type binary. The original field will be left as-is, so your schema must handle it (or you can use valueFieldsNames).

    valueFieldsNames

    allows you to specify a list of field names to be used to filter the fields that get passed to the value encoding; with this you can filter out fields that you don't need in the value, obviating the need to handle them in the schema. This is especially useful when specifying the keyFieldName or headersFieldName. For the avro and json topic data type this is optional; for the plaintext and binary topic data types this field is mandatory and the list must contain a single value field name that has the proper type (string for plaintext and binary for binary).

    useAvroSchemaManager

    if a schema registry should be used or not to handle the schema evolution (it makes sense only for avro message datatype)

    schema

    the Avro schema to use when encoding the value, for plaintext and binary this field is ignored. For json and avro the field names need to match 1:1 with the valueFieldsNames or the schema output of the strategy

    topicCompression

    to use to compress messages

    subjectStrategy

    subject strategy to use when registering the schema to the schema registry for the schema registry implementations that support it. This property makes sense only for avro and only if useAvroSchemaManager is set to true

    keySchema

    the schema to be used to encode the key as avro

  77. case class WebMailModel(name: String) extends DatastoreModel with Product with Serializable

    Permalink
  78. case class WebsocketModel(name: String, host: String, port: String, resourceName: String, options: Option[BsonDocument] = None) extends DatastoreModel with Product with Serializable

    Permalink
  79. case class WriterModel extends Product with Serializable

    Permalink

    A model for a writer, composed by a name, a datastoreModelName defining the datastore, a datastoreProduct defining the datastore software product to use, and any additional options needed to configure the writer.

  80. case class LegacyStreamingETLModel(name: String, inputs: List[ReaderModel], output: WriterModel, mlModels: List[MlModelOnlyInfo], strategy: Option[StrategyModel], kafkaAccessType: String, group: String = "default", isActive: Boolean = false) extends ProcessingComponentModel with Product with Serializable

    Permalink
    Annotations
    @deprecated
    Deprecated

    (Since version 2.8.0)

Value Members

  1. object Aggregate extends Enumeration

    Permalink
  2. object BatchETLModel extends Serializable

    Permalink
  3. object BatchGdprETLModel extends Serializable

    Permalink
  4. object CdcOptions extends Serializable

    Permalink
  5. object ContainsRawMatchingStrategy extends Serializable

    Permalink
  6. object ExactKeyValueMatchingStrategy extends Serializable

    Permalink
  7. object ExactRawMatchingStrategy extends Serializable

    Permalink
  8. object GdprStrategyModel extends Serializable

    Permalink
  9. object GenericCdcMutationFields

    Permalink

    Object used to represents all the fields used to represent a generic mutation inside the cdcPlugin, this object has been placed here because all the cdc adapters (like debezium, goldengate etc etc...) need to know how to map the fields into a compliant dataframe.

  10. object GenericOptions extends Serializable

    Permalink
  11. object HttpCompression

    Permalink
  12. object IndexModelBuilder

    Permalink

    Companion object of IndexModelBuilder, contains the syntax.

    Companion object of IndexModelBuilder, contains the syntax.

    import IndexModelBuilder._ when you want to construct an IndexModel.

  13. object IndexType extends Enumeration

    Permalink
  14. object JobStatus extends Enumeration

    Permalink
  15. object KeyValueDataStoreConf extends Serializable

    Permalink
  16. object KeyValueModel extends Serializable

    Permalink
  17. object LegacyStreamingETLModel extends Serializable

    Permalink
  18. object MultiTopicModel extends Serializable

    Permalink
  19. object NoPartitionPruningStrategy extends Serializable

    Permalink
  20. object PipegraphStatus extends Enumeration

    Permalink
  21. object PrefixAndTimeBoundKeyValueMatchingStrategy extends Serializable

    Permalink
  22. object PrefixKeyValueMatchingStrategy extends Serializable

    Permalink
  23. object PrefixRawMatchingStrategy extends Serializable

    Permalink
  24. object RawDataStoreConf extends Serializable

    Permalink
  25. object RawModel extends Serializable

    Permalink
  26. object RawOptions extends Serializable

    Permalink
  27. object ReaderModel extends Serializable

    Permalink
  28. object SpraySolrProtocol extends DefaultJsonProtocol

    Permalink
  29. object StrategyModel extends Serializable

    Permalink
  30. object StreamingReaderModel extends Serializable

    Permalink
  31. object SubjectStrategy

    Permalink
  32. object TimeBasedBetweenPartitionPruningStrategy extends Serializable

    Permalink
  33. object TopicCompression

    Permalink
  34. object TopicDataTypes

    Permalink
  35. object TopicModel extends Serializable

    Permalink
  36. object WriterModel extends Serializable

    Permalink
  37. package builder

    Permalink
  38. package configuration

    Permalink
  39. package editor

    Permalink

Ungrouped