doric

package doric

Linear Supertypes

All, SortingOps, CollectOps, JoinOps, TransformOps, AggregationOps, RelationalGroupedDatasetDoricInterface, All, DStructs3x, AggregationColumns32, StringColumn3x, MapColumns3x, CommonColumns3x, ArrayColumns3x, BinaryColumns32, StringColumns31, NumericColumns32, NumericColumns31, BooleanColumns31, AggregationColumns31, Interpolators, BinaryColumns, doric.syntax.CNameOps, AggregationColumns, ControlStructures, StringColumns, BooleanColumns, TimestampColumns, DateColumns, NumericColumns, MapColumns, LiteralConversions, DStructs, CommonColumns, ColGetters[NamedDoricColumn], TypeMatcher, ArrayColumns, AnyRef, Any

Ordering

Grouped
Alphabetic
By Inheritance

Inherited

doric
All
SortingOps
CollectOps
JoinOps
TransformOps
AggregationOps
RelationalGroupedDatasetDoricInterface
All
DStructs3x
AggregationColumns32
StringColumn3x
MapColumns3x
CommonColumns3x
ArrayColumns3x
BinaryColumns32
StringColumns31
NumericColumns32
NumericColumns31
BooleanColumns31
AggregationColumns31
Interpolators
BinaryColumns
CNameOps
AggregationColumns
ControlStructures
StringColumns
BooleanColumns
TimestampColumns
DateColumns
NumericColumns
MapColumns
LiteralConversions
DStructs
CommonColumns
ColGetters
TypeMatcher
ArrayColumns
AnyRef
Any

Hide All
Show All

Visibility

Public
Protected

Package Members

package sem
package sqlExpressions
package syntax
package types

Type Members

type ArrayColumn[A] = DoricColumn[Array[A]]
type BinaryColumn = DoricColumn[Array[Byte]]
type BooleanColumn = DoricColumn[Boolean]
type ByteColumn = DoricColumn[Byte]
case class CName(value: String) extends Product with Serializable
case class CNameOrd(name: CName, order: Order) extends Product with Serializable
implicit class CollectSyntax[A] extends AnyRef
Definition Classes
CollectOps
implicit class DStructOps3x[T] extends AnyRef
Definition Classes
DStructs3x
implicit class DataframeAggSyntax extends AnyRef
Definition Classes
AggregationOps
implicit class DataframeSortSyntax extends AnyRef
Definition Classes
SortingOps
implicit class DataframeTransformationSyntax[A] extends AnyRef
Definition Classes
TransformOps
type DateColumn = DoricColumn[Date]
type Doric[T] = Kleisli[DoricValidated, Dataset[_], T]
sealed trait DoricColumn[T] extends AnyRef
type DoricJoin[T] = Kleisli[DoricValidated, (Dataset[_], Dataset[_]), T]
case class DoricJoinColumn(elem: DoricJoin[Column]) extends Product with Serializable
type DoricValidated[T] = Validated[NonEmptyChain[DoricSingleError], T]
type DoubleColumn = DoricColumn[Double]
type FloatColumn = DoricColumn[Float]
type InstantColumn = DoricColumn[Instant]
type IntegerColumn = DoricColumn[Int]
sealed abstract class JoinSideDoricColumn[T] extends AnyRef
case class LeftDoricColumn[T](elem: Doric[Column]) extends JoinSideDoricColumn[T] with Product with Serializable
case class LiteralDoricColumn[T] extends DoricColumn[T] with Product with Serializable
type LocalDateColumn = DoricColumn[LocalDate]
type LongColumn = DoricColumn[Long]
type MapColumn[K, V] = DoricColumn[Map[K, V]]
case class NamedDoricColumn[T] extends DoricColumn[T] with Product with Serializable
type NullColumn = DoricColumn[Null]
sealed trait Order extends AnyRef
implicit class RelationalGroupedDatasetSem extends AnyRef
Definition Classes
AggregationOps
case class RightDoricColumn[T](elem: Doric[Column]) extends JoinSideDoricColumn[T] with Product with Serializable
type RowColumn = DoricColumn[Row]
type StringColumn = DoricColumn[String]
implicit final class StringIntCNameOps extends AnyVal
type TimestampColumn = DoricColumn[Timestamp]
case class TransformationDoricColumn[T] extends DoricColumn[T] with Product with Serializable
implicit class DataframeJoinSyntax[A] extends AnyRef
Definition Classes
JoinOps
implicit class ArrayArrayColumnSyntax[G[_], F[_], T] extends AnyRef
Definition Classes
ArrayColumns
implicit class ArrayColumnSyntax[T, F[_]] extends AnyRef
Extension methods for arrays
Extension methods for arrays
Definition Classes
ArrayColumns
implicit class ArrayColumnTupleSyntax[K, V, F[_]] extends AnyRef
Extension methods for arrays
Extension methods for arrays
Definition Classes
ArrayColumns
implicit class ArrayColumnSyntax3x[T, F[_]] extends AnyRef
Definition Classes
ArrayColumns3x
implicit class ArrayStructColumnSyntax3x[F[_]] extends AnyRef
Definition Classes
ArrayColumns3x
implicit class BinaryOperationsSyntax[T] extends AnyRef
Definition Classes
BinaryColumns
implicit class BinaryOperationsSyntax32[T] extends AnyRef
Definition Classes
BinaryColumns32
implicit class BooleanOperationsSyntax extends AnyRef
Definition Classes
BooleanColumns
implicit class BooleanOperationsSyntax31 extends AnyRef
Definition Classes
BooleanColumns31
implicit class CNameOps extends AnyRef
Definition Classes
CNameOps
implicit class StringCNameOps extends AnyRef
Definition Classes
CNameOps
implicit class BasicCol[T] extends AnyRef
Extension methods for any kind of column
Extension methods for any kind of column
Definition Classes
CommonColumns
implicit class CastingImpl[T] extends AnyRef
Casting methods
Casting methods
Definition Classes
CommonColumns
implicit class SparkCol extends AnyRef
Definition Classes
CommonColumns
implicit class ControlStructuresImpl[O] extends AnyRef
Definition Classes
ControlStructures
implicit class DStructOps[T] extends AnyRef
Definition Classes
DStructs
class DynamicFieldAccessor[T] extends Dynamic
Definition Classes
DStructs
trait SelectorLPI extends AnyRef
Definition Classes
DStructs
trait SelectorWithSparkType[L <: HList, K <: Symbol] extends AnyRef
Definition Classes
DStructs
Annotations
@implicitNotFound()
implicit class StructOps[T, L <: HList] extends AnyRef
Definition Classes
DStructs
implicit class DateColumnLikeSyntax[T] extends AnyRef
Definition Classes
DateColumns
implicit class doricStringInterpolator extends AnyRef
Definition Classes
Interpolators
implicit class DoricColLiteralGetter[T] extends AnyRef
Definition Classes
LiteralConversions
implicit class LiteralOps[L] extends AnyRef
Definition Classes
LiteralConversions
implicit class MapColumnOps[K, V] extends AnyRef
Extension methods for Map Columns
Extension methods for Map Columns
Definition Classes
MapColumns
implicit class MapColumnOps3x[K, V] extends AnyRef
Extension methods for Map Columns
Extension methods for Map Columns
Definition Classes
MapColumns3x
implicit class IntegralOperationsSyntax[T] extends AnyRef
INTEGRAL OPERATIONS
INTEGRAL OPERATIONS
Definition Classes
NumericColumns
implicit class LongOperationsSyntax extends AnyRef
LONG OPERATIONS
LONG OPERATIONS
Definition Classes
NumericColumns
implicit class NumWithDecimalsOperationsSyntax[T] extends AnyRef
NUM WITH DECIMALS OPERATIONS
NUM WITH DECIMALS OPERATIONS
Definition Classes
NumericColumns
implicit class NumericOperationsSyntax[T] extends AnyRef
GENERIC NUMERIC OPERATIONS
GENERIC NUMERIC OPERATIONS
Definition Classes
NumericColumns
implicit class NumWithDecimalsOperationsSyntax31[T] extends AnyRef
NUM WITH DECIMALS OPERATIONS
NUM WITH DECIMALS OPERATIONS
Definition Classes
NumericColumns31
implicit class NumericOperationsSyntax31[T] extends AnyRef
Definition Classes
NumericColumns31
implicit class IntegralOperationsSyntax32[T] extends AnyRef
INTEGRAL OPERATIONS
INTEGRAL OPERATIONS
Definition Classes
NumericColumns32
implicit class StringOperationsSyntax3x extends AnyRef
Definition Classes
StringColumn3x
implicit class StringOperationsSyntax extends AnyRef
Unique column operations
Unique column operations
Definition Classes
StringColumns
implicit class StringOperationsSyntax31 extends AnyRef
Definition Classes
StringColumns31
implicit class TimestampColumnLikeSyntax[T] extends AnyRef
Definition Classes
TimestampColumns

Abstract Value Members

abstract def constructSide[T](column: Doric[Column], colName: String): NamedDoricColumn[T]
Attributes
protected
Definition Classes
ColGetters
Annotations
@inline()

Concrete Value Members

def andAgg(col: BooleanColumn): BooleanColumn
Aggregate function: returns the AND value for a boolean column
Aggregate function: returns the AND value for a boolean column
Definition Classes
AggregationColumns
def aproxCountDistinct(colName: String): LongColumn
Aggregate function: returns the approximate number of distinct items in a group.
Aggregate function: returns the approximate number of distinct items in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.approx_count_distinct
def aproxCountDistinct(colName: String, rsd: Double): LongColumn
Aggregate function: returns the approximate number of distinct items in a group.
Aggregate function: returns the approximate number of distinct items in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.approx_count_distinct
def aproxCountDistinct(col: DoricColumn[_]): LongColumn
Aggregate function: returns the approximate number of distinct items in a group.
Aggregate function: returns the approximate number of distinct items in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.approx_count_distinct
def aproxCountDistinct(col: DoricColumn[_], rsd: Double): LongColumn
Aggregate function: returns the approximate number of distinct items in a group.
Aggregate function: returns the approximate number of distinct items in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.approx_count_distinct
def array[T](cols: DoricColumn[T]*)(implicit arg0: SparkType[T], arg1: ClassTag[T], lt: LiteralSparkType[Array[T]]): ArrayColumn[T]
Creates a new array column.
Creates a new array column. The input columns must all have the same data type.
Definition Classes
ArrayColumns
To do
scaladoc link (issue #135)
See also
org.apache.spark.sql.functions.array
def avg[T](col: DoricColumn[T])(implicit arg0: NumericType[T]): DoubleColumn
Aggregate function: returns the average of the values in a group.
Aggregate function: returns the average of the values in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.avg
def coalesce[T](cols: DoricColumn[T]*): DoricColumn[T]
Returns the first column that is not null, or null if all inputs are null.
Returns the first column that is not null, or null if all inputs are null.
For example, coalesce(a, b, c) will return a if a is not null, or b if a is null and b is not null, or c if both a and b are null but c is not null.
cols
the DoricColumns to coalesce
returns
the first column that is not null, or null if all inputs are null.
Definition Classes
CommonColumns
See also
org.apache.spark.sql.functions.coalesce
def col[T](colName: String)(implicit arg0: SparkType[T], location: Location): NamedDoricColumn[T]
Retrieves a column with the provided name and the provided type.
Retrieves a column with the provided name and the provided type.
T
the expected type of the column
colName
the name of the column to find.
location
error location.
returns
the column reference
Definition Classes
ColGetters
def colArray[T](colName: String)(implicit arg0: ClassTag[T], location: Location, st: SparkType[Array[T]]): NamedDoricColumn[Array[T]]
Retrieves a column with the provided name expecting it to be of array of T type.
Retrieves a column with the provided name expecting it to be of array of T type.
T
the type of the elements of the array.
colName
the name of the column to find.
location
error location.
returns
the array of T column reference.
Definition Classes
ColGetters
def colArrayInt(colName: String)(implicit location: Location): NamedDoricColumn[Array[Int]]
Retrieves a column with the provided name expecting it to be of array of integers type.
Retrieves a column with the provided name expecting it to be of array of integers type.
colName
the name of the column to find.
location
error location.
returns
the array of integers column reference.
Definition Classes
ColGetters
def colArrayString(colName: String)(implicit location: Location): NamedDoricColumn[Array[String]]
Retrieves a column with the provided name expecting it to be of array of string type.
Retrieves a column with the provided name expecting it to be of array of string type.
colName
the name of the column to find.
location
error location.
returns
the array of string column reference.
Definition Classes
ColGetters
def colBinary(colName: String)(implicit location: Location): NamedDoricColumn[Array[Byte]]
Retrieves a column with the provided name expecting it to be of array of bytes type.
Retrieves a column with the provided name expecting it to be of array of bytes type.
colName
the name of the column to find.
location
error location.
returns
the binary column reference.
Definition Classes
ColGetters
def colBoolean(colName: String)(implicit location: Location): NamedDoricColumn[Boolean]
Retrieves a column with the provided name expecting it to be of double type.
Retrieves a column with the provided name expecting it to be of double type.
colName
the name of the column to find.
location
error location.
returns
the long column reference
Definition Classes
ColGetters
def colDate(colName: String)(implicit location: Location): NamedDoricColumn[Date]
Retrieves a column with the provided name expecting it to be of Date type.
Retrieves a column with the provided name expecting it to be of Date type.
colName
the name of the column to find.
location
error location.
returns
the Date column reference
Definition Classes
ColGetters
def colDouble(colName: String)(implicit location: Location): NamedDoricColumn[Double]
Retrieves a column with the provided name expecting it to be of double type.
Retrieves a column with the provided name expecting it to be of double type.
colName
the name of the column to find.
location
error location.
returns
the double column reference
Definition Classes
ColGetters
def colFloat(colName: String)(implicit location: Location): NamedDoricColumn[Float]
Retrieves a column with the provided name expecting it to be of float type.
Retrieves a column with the provided name expecting it to be of float type.
colName
the name of the column to find.
location
error location.
returns
the float column reference
Definition Classes
ColGetters
def colFromDF[T](colName: String, originDF: Dataset[_])(implicit arg0: SparkType[T], location: Location): NamedDoricColumn[T]
Retrieves a column of the provided dataframe.
Retrieves a column of the provided dataframe. Useful to prevent column ambiguity errors.
T
the type of the doric column.
colName
the name of the column to find.
originDF
the dataframe to force the column.
location
error location.
returns
the column of type T column reference.
Definition Classes
ColGetters
def colInstant(colName: String)(implicit location: Location): NamedDoricColumn[Instant]
Retrieves a column with the provided name expecting it to be of instant type.
Retrieves a column with the provided name expecting it to be of instant type.
colName
the name of the column to find.
location
error location.
returns
the instant column reference
Definition Classes
ColGetters
def colInt(colName: String)(implicit location: Location): NamedDoricColumn[Int]
Retrieves a column with the provided name expecting it to be of integer type.
Retrieves a column with the provided name expecting it to be of integer type.
colName
the name of the column to find.
location
error location.
returns
the integer column reference
Definition Classes
ColGetters
def colLocalDate(colName: String)(implicit location: Location): NamedDoricColumn[LocalDate]
Retrieves a column with the provided name expecting it to be of LocalDate type.
Retrieves a column with the provided name expecting it to be of LocalDate type.
colName
the name of the column to find.
location
error location.
returns
the LocalDate column reference
Definition Classes
ColGetters
def colLong(colName: String)(implicit location: Location): NamedDoricColumn[Long]
Retrieves a column with the provided name expecting it to be of long type.
Retrieves a column with the provided name expecting it to be of long type.
colName
the name of the column to find.
location
error location.
returns
the long column reference
Definition Classes
ColGetters
def colMap[K, V](colName: String)(implicit arg0: SparkType[K], arg1: SparkType[V], location: Location): NamedDoricColumn[Map[K, V]]
Retrieves a column with the provided name expecting it to be of map type.
Retrieves a column with the provided name expecting it to be of map type.
K
the type of the keys of the map.
V
the type of the values of the map.
colName
the name of the column to find.
location
error location.
returns
the map column reference.
Definition Classes
ColGetters
def colMapString[V](colName: String)(implicit arg0: SparkType[V], location: Location): NamedDoricColumn[Map[String, V]]
Retrieves a column with the provided name expecting it to be of map type.
Retrieves a column with the provided name expecting it to be of map type.
V
the type of the values of the map.
colName
the name of the column to find.
location
error location.
returns
the map column reference.
Definition Classes
ColGetters
def colNull(colName: String)(implicit location: Location): NamedDoricColumn[Null]
Retrieves a column with the provided name expecting it to be of null type.
Retrieves a column with the provided name expecting it to be of null type.
colName
the name of the column to find.
location
error location.
returns
the null column reference
Definition Classes
ColGetters
def colString(colName: String)(implicit location: Location): NamedDoricColumn[String]
Retrieves a column with the provided name expecting it to be of string type.
Retrieves a column with the provided name expecting it to be of string type.
colName
the name of the column to find.
location
error location.
returns
the string column reference
Definition Classes
ColGetters
def colStruct(colName: String)(implicit location: Location): NamedDoricColumn[Row]
Retrieves a column with the provided name expecting it to be of struct type.
Retrieves a column with the provided name expecting it to be of struct type.
colName
the name of the column to find.
location
error location.
returns
the struct column reference.
Definition Classes
ColGetters
def colTimestamp(colName: String)(implicit location: Location): NamedDoricColumn[Timestamp]
Retrieves a column with the provided name expecting it to be of Timestamp type.
Retrieves a column with the provided name expecting it to be of Timestamp type.
colName
the name of the column to find.
location
error location.
returns
the Timestamp column reference
Definition Classes
ColGetters
def collectList[T](col: DoricColumn[T]): ArrayColumn[T]
Aggregate function: returns a list of objects with duplicates.
Aggregate function: returns a list of objects with duplicates.
Definition Classes
AggregationColumns
Note
The function is non-deterministic because the order of collected results depends on the order of the rows which may be non-deterministic after a shuffle.
See also
org.apache.spark.sql.functions.collect_list
def collectSet[T](col: DoricColumn[T]): ArrayColumn[T]
Aggregate function: returns a set of objects with duplicate elements eliminated.
Aggregate function: returns a set of objects with duplicate elements eliminated.
Definition Classes
AggregationColumns
Note
The function is non-deterministic because the order of collected results depends on the order of the rows which may be non-deterministic after a shuffle.
See also
org.apache.spark.sql.functions.collect_set
def concat(cols: StringColumn*): StringColumn
Concatenate string columns to form a single one
Concatenate string columns to form a single one
cols
the String DoricColumns to concatenate
returns
a reference of a single DoricColumn with all strings concatenated. If at least one is null will return null.
Definition Classes
StringColumns
See also
org.apache.spark.sql.functions.concat
def concatArrays[T, F[_]](cols: DoricColumn[F[T]]*)(implicit arg0: CollectionType[F]): DoricColumn[F[T]]
Concatenates multiple array columns together into a single column.
Concatenates multiple array columns together into a single column.
T
The type of the elements of the arrays.
cols
the array columns, must be Arrays of the same type.
returns
Doric Column with the concatenation.
Definition Classes
ArrayColumns
See also
org.apache.spark.sql.functions.concat
def concatBinary(col: BinaryColumn, cols: BinaryColumn*): BinaryColumn
Concatenates multiple binary columns together into a single column.
Concatenates multiple binary columns together into a single column.
col
the first binary column
cols
the binary columns
returns
Doric Column with the concatenation.
Definition Classes
BinaryColumns
See also
org.apache.spark.sql.functions.concat
def concatMaps[K, V](col: MapColumn[K, V], cols: MapColumn[K, V]*): MapColumn[K, V]
Returns the union of all the given maps.
Returns the union of all the given maps.
Definition Classes
MapColumns
See also
org.apache.spark.sql.functions.map_concat
def concatWs(sep: StringColumn, cols: StringColumn*): StringColumn
Concatenates multiple input string columns together into a single string column, using the given separator.
Concatenates multiple input string columns together into a single string column, using the given separator.
Definition Classes
StringColumns
Example:
1. df.withColumn("res", concatWs("-".lit, col("col1"), col("col2"))) .show(false) +----+----+----+ |col1|col2| res| +----+----+----+ | 1| 1| 1-1| |null| 2| 2| | 3|null| 3| |null|null| | +----+----+----+
Note
even if cols contain null columns, it prints remaining string columns (or empty string).
See also
org.apache.spark.sql.functions.concat_ws
def correlation(col1: DoubleColumn, col2: DoubleColumn): DoubleColumn
Aggregate function: returns the Pearson Correlation Coefficient for two columns.
Aggregate function: returns the Pearson Correlation Coefficient for two columns.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.corr
def count(colName: CName): LongColumn
Aggregate function: returns the number of items in a group.
Aggregate function: returns the number of items in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.count
def count(col: DoricColumn[_]): LongColumn
Aggregate function: returns the number of items in a group.
Aggregate function: returns the number of items in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.count
def countDistinct(columnName: CName, columnNames: CName*): LongColumn
Aggregate function: returns the number of distinct items in a group.
Aggregate function: returns the number of distinct items in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.countDistinct
def countDistinct(expr: DoricColumn[_], exprs: DoricColumn[_]*): LongColumn
Aggregate function: returns the number of distinct items in a group.
Aggregate function: returns the number of distinct items in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.countDistinct
def covarPop(col1: DoubleColumn, col2: DoubleColumn): DoubleColumn
Aggregate function: returns the population covariance for two columns.
Aggregate function: returns the population covariance for two columns.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.covar_pop
def covarSamp(col1: DoubleColumn, col2: DoubleColumn): DoubleColumn
Aggregate function: returns the sample covariance for two columns.
Aggregate function: returns the sample covariance for two columns.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.covar_samp
def currentDate(): DateColumn
Returns the current date at the start of query evaluation as a date column.
Returns the current date at the start of query evaluation as a date column. All calls of current_date within the same query return the same value.
Definition Classes
DateColumns
See also
org.apache.spark.sql.functions.current_date
def currentDateT[T]()(implicit arg0: DateType[T], arg1: SparkType[T]): DoricColumn[T]
Returns the current date at the start of query evaluation as a date column typed with the provided T.
Returns the current date at the start of query evaluation as a date column typed with the provided T. All calls of current_date within the same query return the same value.
Definition Classes
DateColumns
See also
org.apache.spark.sql.functions.current_date
def currentTimestamp(): TimestampColumn
Returns the current timestamp at the start of query evaluation as a timestamp column.
Returns the current timestamp at the start of query evaluation as a timestamp column. All calls of current_timestamp within the same query return the same value.
Definition Classes
TimestampColumns
See also
org.apache.spark.sql.functions.current_timestamp
def currentTimestampT[T]()(implicit arg0: TimestampType[T], arg1: SparkType[T]): DoricColumn[T]
Returns the current timestamp at the start of query evaluation as a timestamp column.
Returns the current timestamp at the start of query evaluation as a timestamp column. All calls of current_timestamp within the same query return the same value.
Definition Classes
TimestampColumns
See also
org.apache.spark.sql.functions.current_timestamp
def customAgg[T, A, E](column: DoricColumn[T], initial: DoricColumn[A], update: (DoricColumn[A], DoricColumn[T]) => DoricColumn[A], merge: (DoricColumn[A], DoricColumn[A]) => DoricColumn[A], evaluate: (DoricColumn[A]) => DoricColumn[E])(implicit arg0: SparkType[A]): DoricColumn[E]
Definition Classes
AggregationColumns32
def first[T](col: DoricColumn[T], ignoreNulls: Boolean): DoricColumn[T]
Aggregate function: returns the first value in a group.
Aggregate function: returns the first value in a group.
The function by default returns the first values it sees. It will return the first non-null value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
Definition Classes
AggregationColumns
Note
The function is non-deterministic because its results depends on the order of the rows which may be non-deterministic after a shuffle.
See also
org.apache.spark.sql.functions.first
def first[T](col: DoricColumn[T]): DoricColumn[T]
Aggregate function: returns the first value in a group.
Aggregate function: returns the first value in a group.
The function by default returns the first values it sees. It will return the first non-null value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
Definition Classes
AggregationColumns
Note
The function is non-deterministic because its results depends on the order of the rows which may be non-deterministic after a shuffle.
See also
org.apache.spark.sql.functions.first
def formatString(format: StringColumn, arguments: DoricColumn[_]*): StringColumn
Formats the arguments in printf-style and returns the result as a string column.
Formats the arguments in printf-style and returns the result as a string column.
format
Printf format
arguments
the String DoricColumns to format
returns
Formats the arguments in printf-style and returns the result as a string column.
Definition Classes
StringColumns
See also
org.apache.spark.sql.functions.format_string
def greatest[T](col: DoricColumn[T], cols: DoricColumn[T]*): DoricColumn[T]
Returns the greatest value of the list of values, skipping null values.
Returns the greatest value of the list of values, skipping null values. This function takes at least 2 parameters. It will return null iff all parameters are null.
Definition Classes
CommonColumns
Note
skips null values
See also
org.apache.spark.sql.functions.greatest
def grouping(columnName: CName): ByteColumn
Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated or not, returns 1 for aggregated or 0 for not aggregated in the result set.
Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated or not, returns 1 for aggregated or 0 for not aggregated in the result set.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.grouping
def grouping(col: DoricColumn[_]): ByteColumn
Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated or not, returns 1 for aggregated or 0 for not aggregated in the result set.
Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated or not, returns 1 for aggregated or 0 for not aggregated in the result set.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.grouping
def groupingId(colName: CName, colNames: CName*): LongColumn
Aggregate function: returns the level of grouping, equals to
Aggregate function: returns the level of grouping, equals to
Definition Classes
AggregationColumns
Example:
1. (grouping(c1) <<; (n-1)) + (grouping(c2) <<; (n-2)) + ... + grouping(cn)
Note
The list of columns should match with grouping columns exactly, or empty (means all the grouping columns).
See also
org.apache.spark.sql.functions.grouping_id
def groupingId(col: DoricColumn[_], cols: DoricColumn[_]*): LongColumn
Aggregate function: returns the level of grouping, equals to
Aggregate function: returns the level of grouping, equals to
Definition Classes
AggregationColumns
Example:
1. (grouping(c1) <<; (n-1)) + (grouping(c2) <<; (n-2)) + ... + grouping(cn)
Note
The list of columns should match with grouping columns exactly, or empty (means all the grouping columns).
See also
org.apache.spark.sql.functions.grouping_id
def hash(cols: DoricColumn[_]*): IntegerColumn
Calculates the hash code of given columns, and returns the result as an integer column.
Calculates the hash code of given columns, and returns the result as an integer column.
Definition Classes
CommonColumns
See also
org.apache.spark.sql.functions.hash
def inputFileName(): StringColumn
Creates a string column for the file name of the current Spark task.
Creates a string column for the file name of the current Spark task.
Definition Classes
StringColumns
See also
org.apache.spark.sql.functions.input_file_name
def kurtosis(col: DoubleColumn): DoubleColumn
Aggregate function: returns the kurtosis of the values in a group.
Aggregate function: returns the kurtosis of the values in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.kurtosis
def last[T](col: DoricColumn[T], ignoreNulls: Boolean): DoricColumn[T]
Aggregate function: returns the last value in a group.
Aggregate function: returns the last value in a group.
The function by default returns the last values it sees. It will return the last non-null value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
Definition Classes
AggregationColumns
Note
The function is non-deterministic because its results depends on the order of the rows which may be non-deterministic after a shuffle.
See also
org.apache.spark.sql.functions.last
def last[T](col: DoricColumn[T]): DoricColumn[T]
Aggregate function: returns the last value in a group.
Aggregate function: returns the last value in a group.
The function by default returns the last values it sees. It will return the last non-null value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
Definition Classes
AggregationColumns
Note
The function is non-deterministic because its results depends on the order of the rows which may be non-deterministic after a shuffle.
See also
org.apache.spark.sql.functions.last
def least[T](col: DoricColumn[T], cols: DoricColumn[T]*): DoricColumn[T]
Returns the least value of the list of values, skipping null values.
Returns the least value of the list of values, skipping null values. This function takes at least 2 parameters. It will return null iff all parameters are null.
Definition Classes
CommonColumns
Note
skips null values
See also
org.apache.spark.sql.functions.least
def list[T](cols: DoricColumn[T]*): DoricColumn[List[T]]
Creates a new list column.
Creates a new list column. The input columns must all have the same data type.
Definition Classes
ArrayColumns
To do
scaladoc link (issue #135)
See also
org.apache.spark.sql.functions.array
def lit[L](litv: L)(implicit arg0: SparkType[L], arg1: LiteralSparkType[L], l: Location): LiteralDoricColumn[L]
Creates a literal with the provided value.
Creates a literal with the provided value.
L
The type of the literal.
litv
the element to create as a literal.
returns
A doric column that represent the literal value and the same type as the value.
Definition Classes
LiteralConversions
def map[K, V](first: (DoricColumn[K], DoricColumn[V]), rest: (DoricColumn[K], DoricColumn[V])*): MapColumn[K, V]
Creates a new map column.
Creates a new map column. The input is formed by tuples of key and the corresponding value.
K
the type of the keys of the Map
V
the type of the values of the Map
first
a pair of key value DoricColumns
rest
the rest of pairs of key and corresponding Values.
returns
the DoricColumn of the corresponding Map type
Definition Classes
MapColumns
See also
org.apache.spark.sql.functions.map
def mapFromArrays[K, V](keys: DoricColumn[Array[K]], values: DoricColumn[Array[V]]): MapColumn[K, V]
Creates a new map column.
Creates a new map column. The array in the first column is used for keys. The array in the second column is used for values. All elements in the array for key should not be null.
K
the type of the Array elements of the keys.
V
the type of the Array elements of the value.
keys
the array to create the keys.
values
the array to create the values.
returns
an DoricColumn of type Map of the keys and values.
Definition Classes
MapColumns
See also
org.apache.spark.sql.functions.map_from_arrays
def matchToType[T](colName: String)(implicit arg0: SparkType[T]): EmptyTypeMatcher[T]
Definition Classes
TypeMatcher
def max[T](col: DoricColumn[T]): DoricColumn[T]
Aggregate function: returns the maximum value of the expression in a group.
Aggregate function: returns the maximum value of the expression in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.max
def mean[T](col: DoricColumn[T])(implicit arg0: NumericType[T]): DoubleColumn
Aggregate function: returns the maximum value of the expression in a group.
Aggregate function: returns the maximum value of the expression in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.mean
def min[T](col: DoricColumn[T]): DoricColumn[T]
Aggregate function: returns the maximum value of the expression in a group.
Aggregate function: returns the maximum value of the expression in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.min
lazy val minorScalaVersion: Int
def monotonicallyIncreasingId(): LongColumn
A column expression that generates monotonically increasing 64-bit integers.
A column expression that generates monotonically increasing 64-bit integers.
The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive. The current implementation puts the partition ID in the upper 31 bits, and the record number within each partition in the lower 33 bits. The assumption is that the data frame has less than 1 billion partitions, and each partition has less than 8 billion records.
Definition Classes
NumericColumns
Example:
1. consider a DataFrame with two partitions, each with 3 records. This expression would return the following IDs:
  0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
See also
org.apache.spark.sql.functions.monotonically_increasing_id
def not(col: BooleanColumn): BooleanColumn
Inversion of boolean expression, i.e.
Inversion of boolean expression, i.e. NOT.
Definition Classes
BooleanColumns
See also
org.apache.spark.sql.functions.not
def orAgg(col: BooleanColumn): BooleanColumn
Aggregate function: returns the OR value for a boolean column
Aggregate function: returns the OR value for a boolean column
Definition Classes
AggregationColumns
def percentileApprox[T](col: DoricColumn[T], percentage: Double, accuracy: Int)(implicit arg0: DoubleC[T]): DoricColumn[T]
Aggregate function: returns the approximate percentile of the numeric column col which is the smallest value in the ordered col values (sorted from least to greatest) such that no more than percentage of col values is less than the value or equal to that value.
Aggregate function: returns the approximate percentile of the numeric column col which is the smallest value in the ordered col values (sorted from least to greatest) such that no more than percentage of col values is less than the value or equal to that value.
percentage
must be between 0.0 and 1.0.
accuracy
controls approximation accuracy at the cost of memory. Higher value of accuracy yields better accuracy, 1.0/accuracy is the relative error of the approximation.
Definition Classes
AggregationColumns31
Note
Support NumericType, DateType and TimestampType since their internal types are all numeric, and can be easily cast to double for processing.
See also
org.apache.spark.sql.functions.percentile_approx
def percentileApprox[T](col: DoricColumn[T], percentage: Array[Double], accuracy: Int)(implicit arg0: DoubleC[T]): ArrayColumn[T]
Aggregate function: returns the approximate percentile of the numeric column col which is the smallest value in the ordered col values (sorted from least to greatest) such that no more than percentage of col values is less than the value or equal to that value.
Aggregate function: returns the approximate percentile of the numeric column col which is the smallest value in the ordered col values (sorted from least to greatest) such that no more than percentage of col values is less than the value or equal to that value.
percentage
each value must be between 0.0 and 1.0.
accuracy
controls approximation accuracy at the cost of memory. Higher value of accuracy yields better accuracy, 1.0/accuracy is the relative error of the approximation.
Definition Classes
AggregationColumns31
Note
Support NumericType, DateType and TimestampType since their internal types are all numeric, and can be easily cast to double for processing.
See also
org.apache.spark.sql.functions.percentile_approx
def raiseError(str: String)(implicit l: Location): NullColumn
Throws an exception with the provided error message.
Throws an exception with the provided error message.
Definition Classes
StringColumns31
Exceptions thrown
java.lang.RuntimeException with the error message
See also
org.apache.spark.sql.functions.raise_error
def random(seed: LongColumn): DoubleColumn
Generate a random column with independent and identically distributed (i.i.d.) samples uniformly distributed in [0.0, 1.0).
Generate a random column with independent and identically distributed (i.i.d.) samples uniformly distributed in [0.0, 1.0).
Definition Classes
NumericColumns
Note
The function is non-deterministic in general case.
See also
org.apache.spark.sql.functions.rand
def random(): DoubleColumn
Generate a random column with independent and identically distributed (i.i.d.) samples uniformly distributed in [0.0, 1.0).
Generate a random column with independent and identically distributed (i.i.d.) samples uniformly distributed in [0.0, 1.0).
Definition Classes
NumericColumns
Note
The function is non-deterministic in general case.
See also
org.apache.spark.sql.functions.rand
def randomN(seed: LongColumn): DoubleColumn
Generate a column with independent and identically distributed (i.i.d.) samples from the standard normal distribution.
Generate a column with independent and identically distributed (i.i.d.) samples from the standard normal distribution.
Definition Classes
NumericColumns
Note
The function is non-deterministic in general case.
See also
org.apache.spark.sql.functions.randn
def randomN(): DoubleColumn
Generate a column with independent and identically distributed (i.i.d.) samples from the standard normal distribution.
Generate a column with independent and identically distributed (i.i.d.) samples from the standard normal distribution.
Definition Classes
NumericColumns
Note
The function is non-deterministic in general case.
See also
org.apache.spark.sql.functions.randn
def skewness[T](col: DoricColumn[T])(implicit arg0: NumericType[T]): DoubleColumn
Aggregate function: returns the skewness of the values in a group.
Aggregate function: returns the skewness of the values in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.skewness
def sparkAgg(relationalGroupedDataset: RelationalGroupedDataset, expr: DoricColumn[_], exprs: DoricColumn[_]*): DoricValidated[DataFrame]
Definition Classes
RelationalGroupedDatasetDoricInterface
def sparkCube(df: DataFrame, cols: DoricColumn[_]*): DoricValidated[RelationalGroupedDataset]
Attributes
protected
Definition Classes
RelationalGroupedDatasetDoricInterface
def sparkGroupBy(df: DataFrame, cols: DoricColumn[_]*): DoricValidated[RelationalGroupedDataset]
Attributes
protected
Definition Classes
RelationalGroupedDatasetDoricInterface
def sparkPartitionId(): IntegerColumn
Partition ID.
Partition ID.
Definition Classes
NumericColumns
Note
This is non-deterministic because it depends on data partitioning and task scheduling.
See also
org.apache.spark.sql.functions.spark_partition_id
def sparkPivot[T](relationalGroupedDataset: RelationalGroupedDataset, expr: DoricColumn[T], values: Seq[T]): DoricValidated[RelationalGroupedDataset]
Definition Classes
RelationalGroupedDatasetDoricInterface
def sparkRollup(df: DataFrame, cols: DoricColumn[_]*): DoricValidated[RelationalGroupedDataset]
Attributes
protected
Definition Classes
RelationalGroupedDatasetDoricInterface
def sparkTaskName(): StringColumn
Creates a string column for the file name of the current Spark task.
Creates a string column for the file name of the current Spark task.
Definition Classes
StringColumns
Annotations
@inline()
See also
inputFileName
def stdDev[T](col: DoricColumn[T])(implicit arg0: NumericType[T]): DoubleColumn
Aggregate function: alias for stddev_samp.
Aggregate function: alias for stddev_samp.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.stddev
def stdDevPop[T](col: DoricColumn[T])(implicit arg0: NumericType[T]): DoubleColumn
Aggregate function: returns the population standard deviation of the expression in a group.
Aggregate function: returns the population standard deviation of the expression in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.stddev_pop
def stdDevSamp[T](col: DoricColumn[T])(implicit arg0: NumericType[T]): DoubleColumn
Aggregate function: returns the sample standard deviation of the expression in a group.
Aggregate function: returns the sample standard deviation of the expression in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.stddev_samp
def struct(cols: DoricColumn[_]*): RowColumn
Creates a struct with the columns
Creates a struct with the columns
cols
the columns that will form the struct
returns
A DStruct DoricColumn.
Definition Classes
DStructs
def sum[T](col: DoricColumn[T])(implicit nt: NumericType[T]): DoricColumn[Sum]
Aggregate function: returns the sum of all values in the expression.
Aggregate function: returns the sum of all values in the expression.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.sum
def sumDistinct[T](col: DoricColumn[T])(implicit nt: NumericType[T]): DoricColumn[Sum]
Aggregate function: returns the sum of distinct values in the expression.
Aggregate function: returns the sum of distinct values in the expression.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.sumDistinct
def unixTimestamp(): LongColumn
Returns the current Unix timestamp (in seconds) as a long.
Returns the current Unix timestamp (in seconds) as a long.
Definition Classes
NumericColumns
Note
All calls of unix_timestamp within the same query return the same value (i.e. the current timestamp is calculated at the start of query evaluation).
See also
org.apache.spark.sql.functions.unix_timestamp
def varPop[T](col: DoricColumn[T])(implicit arg0: NumericType[T]): DoubleColumn
Aggregate function: returns the population variance of the values in a group.
Aggregate function: returns the population variance of the values in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.var_pop
def varSamp[T](col: DoricColumn[T])(implicit arg0: NumericType[T]): DoubleColumn
Aggregate function: returns the unbiased variance of the values in a group.
Aggregate function: returns the unbiased variance of the values in a group.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.var_samp
def variance[T](col: DoricColumn[T])(implicit arg0: NumericType[T]): DoubleColumn
Aggregate function: alias for var_samp.
Aggregate function: alias for var_samp.
Definition Classes
AggregationColumns
See also
org.apache.spark.sql.functions.variance
def when[T]: WhenBuilder[T]
Initialize a when builder
Initialize a when builder
T
the type of the returnign DoricColumn
returns
WhenBuilder instance to add the required logic.
Definition Classes
ControlStructures
def xxhash64(cols: DoricColumn[_]*): LongColumn
Calculates the hash code of given columns using the 64-bit variant of the xxHash algorithm, and returns the result as a long column.
Calculates the hash code of given columns using the 64-bit variant of the xxHash algorithm, and returns the result as a long column.
Definition Classes
CommonColumns3x
See also
org.apache.spark.sql.functions.xxhash64
object Asc extends Order
object AscNullsFirst extends Order
object AscNullsLast extends Order
object CName extends Serializable
object CNameOrd extends Serializable
object Desc extends Order
object DescNullsFirst extends Order
object DescNullsLast extends Order
object Doric
object DoricColumn extends ColGetters[NamedDoricColumn]
object LeftDF extends ColGetters[LeftDoricColumn]
object LiteralDoricColumn extends Serializable
object NamedDoricColumn extends Serializable
object RightDF extends ColGetters[RightDoricColumn]
object row extends Dynamic
The object row stands for the top-level row of the DataFrame.
The object row stands for the top-level row of the DataFrame.
Definition Classes
ColGetters
object SelectorWithSparkType extends SelectorLPI
Definition Classes
DStructs

Packages

doric

package doric

Package Members

Type Members

Abstract Value Members

Concrete Value Members

Inherited from All

Inherited from SortingOps

Inherited from CollectOps

Inherited from JoinOps

Inherited from TransformOps

Inherited from AggregationOps

Inherited from RelationalGroupedDatasetDoricInterface

Inherited from All

Inherited from DStructs3x

Inherited from AggregationColumns32

Inherited from StringColumn3x

Inherited from MapColumns3x

Inherited from CommonColumns3x

Inherited from ArrayColumns3x

Inherited from BinaryColumns32

Inherited from StringColumns31

Inherited from NumericColumns32

Inherited from NumericColumns31

Inherited from BooleanColumns31

Inherited from AggregationColumns31

Inherited from Interpolators

Inherited from BinaryColumns

Inherited from doric.syntax.CNameOps

Inherited from AggregationColumns

Inherited from ControlStructures

Inherited from StringColumns

Inherited from BooleanColumns

Inherited from TimestampColumns

Inherited from DateColumns

Inherited from NumericColumns

Inherited from MapColumns

Inherited from LiteralConversions

Inherited from DStructs

Inherited from CommonColumns

Inherited from ColGetters[NamedDoricColumn]

Inherited from TypeMatcher

Inherited from ArrayColumns

Inherited from AnyRef

Inherited from Any

Aggregation Any Type

Aggregation Boolean Type

Aggregation Double Type

Aggregation DoubleC Type

Aggregation Numeric Type

All Types

Array Type

Binary Type

Boolean Type

Control structure

Date Type

Map Type

Numeric Type

String Type

Struct Type

Timestamp Type

Ungrouped

doric