final def !=(arg0: Any): Boolean

Definition Classes: AnyRef → Any

final def ##(): Int

Definition Classes: AnyRef → Any

final def ==(arg0: Any): Boolean

Definition Classes: AnyRef → Any

def YJJJ_to_YYYYJJJ(in_date: Column, ref_date: Column): Column

Converts 1 digit julian year to 4 digits julian year.

in_date: date in Julian in "YJJJ" format
ref_date: date in "yyyyMMdd" format
returns: a date in "YYYYJJJ"

Annotations: @Py4JWhitelist()

lazy val adjustCenturyDateInCyyFormat: UserDefinedFunction

begining of input should have Cyy

def adjustStringRegexPattern(input: String): String

def alteryxFlattenSchema(dataFrame: DataFrame, jsonParsedColumnName: String, sparkSession: SparkSession): DataFrame

Annotations: @Py4JWhitelist()

final def asInstanceOf[T0]: T0

Definition Classes: Any

lazy val bigDecimalToPackedBytes: UserDefinedFunction

lazy val bv_all_zeros: UserDefinedFunction

lazy val bv_and: UserDefinedFunction

lazy val bv_count_one_bits: UserDefinedFunction

lazy val bv_difference: UserDefinedFunction

lazy val bv_from_index_vector: UserDefinedFunction

lazy val bv_indices: UserDefinedFunction

lazy val bv_or: UserDefinedFunction

lazy val bv_vector_or: UserDefinedFunction

lazy val canonical_representation: UserDefinedFunction

lazy val char_string: UserDefinedFunction

def clone(): AnyRef

Attributes: protected[lang]
Definition Classes: AnyRef
Annotations: @throws( ... ) @native() @HotSpotIntrinsicCandidate()

def computeChecksum(df: DataFrame): DataFrame

Annotations: @Py4JWhitelist()

def convertInputBytesToStructType(input: Any, typeInfo: Seq[String], startByte: Int = 0): Row

Method used for abinitio's reinterpret_as function to read necessary bytes from byteArray for input data and convert into struct format as per provided in typeInfo sequence.

TypeInfo can have multiple entries, each could be either decimal or string type. Depending on the argument passed within decimal or string bytes are read from input byte array.

If decimal or string argument has some integer then that many bytes are read from input byte array or if decimal or string has some string delimiter as its argument then from the current position bytes are read until string delimiter is found in input byte array.

def createDataFrameFromData(inputData: String, delimiter: String, columnName: String, columnType: String, sparkSession: SparkSession): DataFrame

Method to read values from inputData and create dataframe with column name as columnName and column type as columnType for the values in inputData delimiter by delimiter.

Annotations: @Py4JWhitelist()

lazy val cross_join_index_range: UserDefinedFunction

def date_add_months(inputDate: Column, months: Int): Column

Returns the internal representation of a date resulting from adding (or subtracting) a number of months to the specified date.

inputDate: in yyyy-MM-dd format

Annotations: @Py4JWhitelist()

def date_difference_days(laterDate: Column, earlierDate: Column): Column

Computes number of days between two specified dates in "yyyyMMdd" format

laterDate: input date
earlierDate: input date
returns: number of days between laterDate and earlierDate or null if either one is null

Annotations: @Py4JWhitelist()

lazy val date_month_end: UserDefinedFunction

def date_to_int(input: Column): Column

Annotations: @Py4JWhitelist()

lazy val datetime_add: UserDefinedFunction

def datetime_add_months(input: Column, months: Int): Column

Returns the internal representation of a timestamp resulting from adding (or subtracting) a number of months to the specified timestamp.

input: timestamp in yyyy-MM-dd HH:mm:ss.SSSS format

Annotations: @Py4JWhitelist()

lazy val datetime_difference: UserDefinedFunction

def datetime_difference_hours(end: Column, start: Column): Column

Returns the number of hours between two specified dates in standard format yyyy-MM-dd HH:mm:ss.SSSS.

Annotations: @Py4JWhitelist()

def datetime_difference_minutes(end: Column, start: Column): Column

Returns the number of minutes between two specified dates in standard format yyyy-MM-dd HH:mm:ss.SSSS.

Annotations: @Py4JWhitelist()

def datetime_difference_seconds(end: Column, start: Column): Column

Returns the number of minutes between two specified dates in standard format yyyy-MM-dd HH:mm:ss.SSSS.

Annotations: @Py4JWhitelist()

def datetime_from_unixtime(seconds: Column): Column

Annotations: @Py4JWhitelist()

def decimal_lpad(input: Column, len: Int, char_to_pad_with: String = "0", decimal_point_char: String = "."): Column

Method uses a java regex to identify decimal numbers from input string.

Method uses a java regex to identify decimal numbers from input string. This decimal number could be of 3 types 1. Simple integral number. e.g. 013334848. This part is identified by regex. 2. decimal number with explicit decimal point. e.g. 123456.90. This part is identified by combination of [0-9]+(\$$decimal_point_char)[0-9]+ and (0\$$decimal_point_char)[0-9]+ regex

After extracting decimal number this code checks if length of decimal number is more than len parameter or not. If length is more than len parameter then it simply returns this extracted decimal number. Otherwise it first left pad decimal number with char_to_pad_with to make its length equal to len parameter and then adjusts minus sign (-) to left most part of decimal number.

input: input string.
len: length of characters.
char_to_pad_with: character to left pad with. default value is "0"
decimal_point_char: A string that specifies the character that represents the decimal point.
returns: a decimal string of the specified length or longer, left-padded with a specified character as needed and trimmed of leading zeros.

Annotations: @Py4JWhitelist()

def decimal_lrepad(input: Column, len: Int, char_to_pad_with: String = "0", decimal_point_char: String = "."): Column

Method uses a java regex to identify decimal numbers from input string.

Method uses a java regex to identify decimal numbers from input string. This decimal number could be of 3 types 1. Simple integral number. e.g. 013334848. This part is identified by combination of [1-9][0-9]*[0-9] and [1-9]+ regex 2. decimal number with explicit decimal point. e.g. 123456.90. This part is identified by combination of [1-9][0-9]*(\\\$$decimal_point_char)[0-9]+ and (0\\\$$decimal_point_char)[0-9]*[0-9] regex

After extracting decimal number this code checks if length of decimal number is more than len parameter or not. If length is more than len parameter then it simply returns this extracted decimal number. Otherwise it first left pad decimal number with char_to_pad_with to make its length equal to len parameter and then adjusts minus sign (-) to left most part of decimal number.

input: input string.
len: length of characters.
char_to_pad_with: character to left pad with. default value is "0"
decimal_point_char: A string that specifies the character that represents the decimal point.
returns: a decimal string of the specified length or longer, left-padded with a specified character as needed and trimmed of leading zeros.

Annotations: @Py4JWhitelist()

def decimal_round(input: Column, places: Int): Column

Annotations: @Py4JWhitelist()

def decimal_round_down(input: Column, right_digits: Int): Column

Function returns a value which is rounded down to right_digits number of digits to the right of decimal point.

Annotations: @Py4JWhitelist()

def decimal_round_even(input: Column, places: Int): Column

Annotations: @Py4JWhitelist()

def decimal_round_up(input: Column, places: Int): Column

Returns a number rounded up to a specified number of places to the right of the decimal point.

Annotations: @Py4JWhitelist()

def decimal_strip(input: Column, decimal_point_char: String = "."): Column

Function uses a java regex to identify decimal numbers from input string.

Function uses a java regex to identify decimal numbers from input string. This decimal number could be of 3 types 1. Simple integral number. e.g. 013334848. This part is identified by combination of [1-9][0-9 ]*[0-9] and [1-9]+ regex 2. decimal number with explicit decimal point. e.g. 123456.90. This part is identified by combination of [1-9][0-9]*(\$$decimal_point_char)[0-9 ]+ and (0\$$decimal_point_char)[0-9 ]*[0-9] regex

After extracting decimal number this code looks for minus sign before extracted number in input and appends it with decimal number if found minus sign.

In the end it replaces all whitespaces with empty string in the final resultant decimal number.

input: input string
decimal_point_char: A string that specifies the character that represents the decimal point.
returns: a decimal from a string that has been trimmed of leading zeros and non-numeric characters.

Annotations: @Py4JWhitelist()

def decimal_truncate(input: Column, number_of_places: Column): Column

Annotations: @Py4JWhitelist()

lazy val decodeBytes: UserDefinedFunction

lazy val decodeString: UserDefinedFunction

lazy val decode_datetime: UserDefinedFunction

UDF to get record of type decode_datetime_type.

UDF to get record of type decode_datetime_type. This record will have all its fields populated with corresponding entries in input date/timestamp.

Returned record will have following schema.

integer(8) year; integer(8) month; integer(8) day; integer(8) hour; integer(8) minute; integer(8) second; integer(8) microsecond;

Note: Supported Input time is in yyyy-MM-dd HH:mm:ss.SSSSSS or yyyy-MM-dd HH:mm:ss or yyyy-MM-dd formats only. Additional handling is done to support timestamp retrieved from now() function call.

lazy val decode_datetime_as_local: UserDefinedFunction

lazy val directory_listing: UserDefinedFunction

lazy val directory_listing_dir_only: UserDefinedFunction

def directory_listing_scala(path: String, filePrefix: String): Column

Annotations: @Py4JWhitelist()

lazy val encodeBytes: UserDefinedFunction

lazy val encodeString: UserDefinedFunction

lazy val encode_date: UserDefinedFunction

integer values specifying days relative to January 1, 1900.

integer values specifying days relative to January 1, 1900. This function returns the internal representation of a date given the year, month, and date. encode_date returns the internal representation of the date specified by the year 1998, the month 5, and the day 18:encode_date(1998, 5, 18) = 35931

def ends_with(input: Column, suffix: String): Column

Returns true if string columns ends with given suffix

Annotations: @Py4JWhitelist()

final def eq(arg0: AnyRef): Boolean

Definition Classes: AnyRef

def equals(arg0: Any): Boolean

Definition Classes: AnyRef → Any

lazy val eval: UserDefinedFunction

Method to return the result of evaluating a string expression in the context of a specified input column.

Method to return the result of evaluating a string expression in the context of a specified input column. Here input column could be struct type record, simple column, array type etc. Here expr could be reference to nested column inside input column or any expression which requires values from input column for its evaulation.

Note: Current implementation only supports scenerio where input column is of struct type and expr is simply dot separated column reference to input struct.

lazy val file_information: UserDefinedFunction

UDF to get file information for passed input file path.

def findFirstElement(input: Column, default: Column = lit(null)): Column

Annotations: @Py4JWhitelist()

def findFirstNonBlankElement(input: Column, default: Column): Column

Annotations: @Py4JWhitelist()

def findLastElement(input: Column, default: Column = lit(null)): Column

Annotations: @Py4JWhitelist()

def first_defined(expr1: Column, expr2: Column): Column

Method to identify and return first non null expression.

Annotations: @Py4JWhitelist()

lazy val first_defined_for_double_Udf: UserDefinedFunction

def flattenStructSchema(schema: StructType, prefix: String = null): Array[Column]

Annotations: @Py4JWhitelist()

lazy val force_error: UserDefinedFunction

def format_decimal(input: Column, scale: Int): Column

Annotations: @Py4JWhitelist()

def from_sv(input: Column, separator: String, schema: StructType): Column

Annotations: @Py4JWhitelist()

def from_xml(content: Column, schema: StructType): Column

Annotations: @Py4JWhitelist()

def from_xml(content: Column, schemaJSON: String): Column

Annotations: @Py4JWhitelist()

def generateDataFrameWithSequenceColumn(start: Int, end: Int, columnName: String, sparkSession: SparkSession): DataFrame

Method to create dataframe with single column containing increasing sequence id from start to end.

Annotations: @Py4JWhitelist()

def generate_sequence(start: Int, end: Int, step: Int = 1): Column

Function to create sequence of array between two passed numbers

start: starting point of generated sequence
end: terminating point of generated sequence.
returns: column containing sequence of integers.

Annotations: @Py4JWhitelist()

lazy val generate_sequence: UserDefinedFunction

UDF to generate column with sequence of integers between two passed start and end columns.

def getAlias(column: Column): String

lazy val getByteFromByteArray: UserDefinedFunction

UDF to get last Byte from ByteArray of input data.

final def getClass(): Class[_]

Definition Classes: AnyRef → Any
Annotations: @native() @HotSpotIntrinsicCandidate()

def getColumnInSecondArrayByFirstNonBlankPositionInFirstArray(nonBlankEntryExpr: Column, firstArray: Column, secondArray: Column): Column

Annotations: @Py4JWhitelist()

def getContentAsStream(content: String): StringAsStream

lazy val getDefaultedElseTrimmed: UserDefinedFunction

def getFebruaryDay(year: Column): Column

Computes number of days in February month in a given year

year: year whose number of days in February needs to be calculated
returns: number of days

Annotations: @Py4JWhitelist()

def getFieldFromStructByPosition(column: Column, position: Int): Column

Method to get field at specific position from struct column

Annotations: @Py4JWhitelist()

lazy val getIntArrayFromByteArray: UserDefinedFunction

UDF to get long comprising of last 8 Bytes from ByteArray of input data.

lazy val getIntFromByteArray: UserDefinedFunction

UDF to get integer comprising of last 4 Bytes from ByteArray of input data.

lazy val getLongArrayFromByteArray: UserDefinedFunction

UDF to get long comprising of last 8 Bytes from ByteArray of input data.

lazy val getLongFromByteArray: UserDefinedFunction

UDF to get long comprising of last 8 Bytes from ByteArray of input data.

def getMTimeDataframe(filepath: String, format: String, spark: SparkSession): DataFrame

Annotations: @Py4JWhitelist()

lazy val getShortFromByteArray: UserDefinedFunction

UDF to get short comprising of last 2 Bytes from ByteArray of input data.

def hashCode(): Int

Definition Classes: AnyRef → Any
Annotations: @native() @HotSpotIntrinsicCandidate()

lazy val hash_MD5: UserDefinedFunction

lazy val hash_SHA1: UserDefinedFunction

def hash_SHA512(input: Column): Column

Annotations: @Py4JWhitelist()

def hash_value(input: Column, keys: Seq[String], hashAlgorithm: String): Column

Annotations: @Py4JWhitelist()

lazy val instr_extended_udf: UserDefinedFunction

lazy val instr_udf: UserDefinedFunction

final def isInstanceOf[T0]: Boolean

Definition Classes: Any

lazy val isNotEqualToValue: UserDefinedFunction

lazy val isNotNullAndNotBlank: UserDefinedFunction

lazy val isNullOrBlank: UserDefinedFunction

def isNullOrEmpty(input: Column): Column

Method to check if current column is null or has empty value.

Annotations: @Py4JWhitelist()

def is_ascii(input: Column): Column

Checks if a string is ascii

input: column to be checked
returns: true if the input string is ascii otherwise false

Annotations: @Py4JWhitelist()

def is_blank(input: Column): Column

Method to identify if input string is a blank string or not.

input: input string.
returns: return 1 if given string contains all blank character or is a zero length string, otherwise it returns 0

Annotations: @Py4JWhitelist()

lazy val is_blank_udf: UserDefinedFunction

lazy val is_bzero: UserDefinedFunction

Tests whether an object is composed of all binary zero bytes.

Tests whether an object is composed of all binary zero bytes. This function returns: 1. 1 if obj contains only binary zero bytes or is a zero-length string 2. 0 if obj contains any non-zero bytes 3. NULL if obj is NULL

def is_not_blank(input: Column): Column

Annotations: @Py4JWhitelist()

def is_not_null(input: Column): Column

Annotations: @Py4JWhitelist()

def is_numeric_ascii(input: Column): Column

Checks if an input string contains only ascii code and numbers

input: string to be checked
returns: true if input string contains only ascii code and numbers or null if input is null

Annotations: @Py4JWhitelist()

def is_valid(input: Column, isNullable: Boolean, formatInfo: Option[Any], len: Option[Seq[Int]]): Column

Method to identify if passed input column is a valid expression after typecasting to passed dataType.

Method to identify if passed input column is a valid expression after typecasting to passed dataType. Also while typecasting if len is present then this function also makes sure the max length of input column after typecasting operation is not greater than len.

input: input column expression to be identified if is valid.
formatInfo: datatype to which input column expression must be typecasted. If datatype is a string then it is treated as timestamp format. If it is a list of string then it is treated as having current timestamp format and and new timestamp format to which input column needs to be typecasted.
len: max length of input column after typecasting it to dataType.
returns: 0 if input column is not valid after typecasting or 1 if it is valid.

Annotations: @Py4JWhitelist()

def is_valid(input: Column, isNullable: Boolean, formatInfo: Option[Any]): Column

Annotations: @Py4JWhitelist()

def is_valid(input: Column, formatInfo: Option[Any], len: Option[Seq[Int]]): Column

Annotations: @Py4JWhitelist()

def is_valid(input: Column, formatInfo: Option[Any]): Column

Annotations: @Py4JWhitelist()

def is_valid(input: Column, isNullable: Boolean): Column

Annotations: @Py4JWhitelist()

def is_valid(input: Column): Column

Annotations: @Py4JWhitelist()

def is_valid_date(dateFormat: String, inDate: Column): Column

Validates date against a input format

dateFormat: A pattern such as yyyy-MM-dd or yyyy-MM-dd HH:mm:ss.SSSS or dd.MM.yyyy
inDate: Input date to be validated
returns: true if the input date is valid otherwise false

Annotations: @Py4JWhitelist()

def is_valid_python_bridge(input: Column, isNullable: Boolean, formatSerialized: Option[Any], len: Option[Seq[Int]]): Column

Annotations: @Py4JWhitelist()

def lastElementInCurrentWindow(input: Column): Column

Annotations: @Py4JWhitelist()

lazy val make_byte_flags: UserDefinedFunction

UDF to return a flag for each character if it is present or not in input String.

def make_constant_vector(size: Int, seedVal: Int): Array[Int]

Method to create array of size "size" containing seedVal as each entry

Annotations: @Py4JWhitelist()

def make_constant_vector(size: Int, seedVal: Column): Column

Method to create array of size "size" containing seedVal as each entry

Annotations: @Py4JWhitelist()

def math_max(input: Column*): Column

Annotations: @Py4JWhitelist()

def math_min(input: Column*): Column

Annotations: @Py4JWhitelist()

lazy val multi_regex_match: UserDefinedFunction

def multi_regex_replace_with_char_conversion(input: Column, charSet: Column, replaceStr: Column, replacement0: String, replacement1: String, pattern: String*): Column

Annotations: @Py4JWhitelist()

lazy val multifile_information: UserDefinedFunction

UDF to get multifile information for passed input file path.

lazy val murmur: UserDefinedFunction

UDF for murmur hash generation for any column type

final def ne(arg0: AnyRef): Boolean

Definition Classes: AnyRef

final def notify(): Unit

Definition Classes: AnyRef
Annotations: @native() @HotSpotIntrinsicCandidate()

final def notifyAll(): Unit

Definition Classes: AnyRef
Annotations: @native() @HotSpotIntrinsicCandidate()

def now(): Column

Method to get current timestamp.

returns: current timestamp in YYYYMMddHHmmssSSSSSS format.

Annotations: @Py4JWhitelist()

def numberOfPartitions(in: DataFrame): Column

Annotations: @Py4JWhitelist()

lazy val number_grouping: UserDefinedFunction

udf to group input decimal into multiple groups separated by separator

lazy val packedBytesStringToDecimal: UserDefinedFunction

lazy val packedBytesToDecimal: UserDefinedFunction

lazy val raw_data_concat: UserDefinedFunction

lazy val raw_data_substring: UserDefinedFunction

lazy val re_get_match: UserDefinedFunction

Returns the first string in a target string that matches a regular expression.

lazy val re_get_match_with_index: UserDefinedFunction

lazy val re_get_matches: UserDefinedFunction

lazy val re_get_matches_with_offset: UserDefinedFunction

lazy val re_index: UserDefinedFunction

UDF wrapper over re_index function.

lazy val re_index_with_offset: UserDefinedFunction

Returns the first string in a target string that matches a regular expression.

lazy val re_match_replace_all: UserDefinedFunction

def re_replace(target: Column, pattern: String, replacement: String, offset: Int = 0): Column

Replaces all substrings in a target string that match a specified regular expression.

target: A string that the function searches for a substring that matches pattern_expr.
pattern: regular expression
replacement: replacement string
offset: Number of characters, from the beginning of str, to skip before searching.
returns: a replaced string in which all substrings, which matches a specified regular expression, are replaced.

Annotations: @Py4JWhitelist()

def re_replace_first(target: Column, pattern: String, replacement: String, offset: Column = lit(0)): Column

Replaces only the first regex matching occurrence in the target string.

target: A string that the function searches for a substring that matches pattern_expr.
pattern: regular expression
replacement: replacement string
returns: a replaced string in which first substring, which matches a specified regular expression, is replaced.

Annotations: @Py4JWhitelist()

lazy val re_split_no_empty: UserDefinedFunction

UDF to split input string via pattern string and remove all empty subtrings.

lazy val readBytesIntoInteger: UserDefinedFunction

lazy val readBytesIntoLong: UserDefinedFunction

lazy val readBytesStringIntoInteger: UserDefinedFunction

lazy val readBytesStringIntoLong: UserDefinedFunction

lazy val read_file: UserDefinedFunction

lazy val record_info: UserDefinedFunction

lazy val record_info_with_includes: UserDefinedFunction

def registerAllUDFs(spark: SparkSession): Unit

Annotations: @Py4JWhitelist()

def remove_non_digit(input: Column): Column

Method removes any non-digit characters from the specified string column.

input: input String Column
returns: Cleaned string column or null

Annotations: @Py4JWhitelist()

def replaceBlankColumnWithNull(input: Column): Column

Method to replace String Columns with Empty value to Null.

Annotations: @Py4JWhitelist()

def replaceNullWithDefaultValues(input: Column, schema: String): Column

Annotations: @Py4JWhitelist()

def replace_null_with_blank(input: Column): Column

Annotations: @Py4JWhitelist()

def scanf_double(format: Column, value: Column): Column

Annotations: @Py4JWhitelist()

def scanf_long(format: Column, value: Column): Column

Annotations: @Py4JWhitelist()

def schemaRowCompareResult(row1: StructType, row2: StructType): Column

Annotations: @Py4JWhitelist()

def sign_explicit(c: Column): Column

Adds an explicit sign to the number.

Adds an explicit sign to the number. E.g. 2 -> +2; -004 -> -004; 0 -> +0

Annotations: @Py4JWhitelist()

lazy val sign_explicit_Udf: UserDefinedFunction

def sign_reserved(c: Column): Column

Annotations: @Py4JWhitelist()

lazy val sign_reserved_Udf: UserDefinedFunction

lazy val splitIntoMultipleColumnsUdf: UserDefinedFunction

UDF to break input string into multiple string via delimiter.

UDF to break input string into multiple string via delimiter. Number of strings after split are adjusted as per passed width parameter. If number of strings are less then empty strings are added otherwise in case of more number of strings, first width number of entries are picked and remaining are discarded.

lazy val splitIntoNormalizedJSON: UserDefinedFunction

def starts_with(input: Column, prefix: String): Column

Returns true if string columns starts with given prefix

Annotations: @Py4JWhitelist()

def string_char(inputStr: Column, index: Int): Column

Method to return character code of character at index position in inputStr string.

inputStr: input string
index: location of character to get code.
returns: integer column.

Annotations: @Py4JWhitelist()

lazy val string_cleanse: UserDefinedFunction

This implementation is incorrect.

def string_compare(input1: Column, input2: Column): Column

Annotations: @Py4JWhitelist()

lazy val string_concat_in_loop: UserDefinedFunction

lazy val string_convert_explicit: UserDefinedFunction

Converts a string from one character set to another, replacing inconvertible characters with a specified string.

lazy val string_filter: UserDefinedFunction

Method which returns string of characters present in both of the strings in the same order as appearing in first string

lazy val string_filter_out: UserDefinedFunction

Compares two input strings, then returns characters that appear in one string but not in the other.

lazy val string_from_hex: UserDefinedFunction

lazy val string_index: UserDefinedFunction

UDF to find index of seekStr in inputStr.

UDF to find index of seekStr in inputStr. Returned index will be 1 based index.

lazy val string_index_with_offset: UserDefinedFunction

UDF to find index of seekStr in inputStr from offset index onwards.

UDF to find index of seekStr in inputStr from offset index onwards. Returned string position is 1 based position.

def string_is_alphabetic(input: Column): Column

Method which returns true if input string contains all alphabetic characters, or false otherwise.

Annotations: @Py4JWhitelist()

def string_is_numeric(input: Column): Column

Method which returns true if input string contains all numeric characters, or false otherwise.

Annotations: @Py4JWhitelist()

def string_join(column: Column, delimiter: String): Column

Concatenates the elements of column using the delimiter.

Annotations: @Py4JWhitelist()

def string_length(input: Column): Column

Annotations: @Py4JWhitelist()

lazy val string_like: UserDefinedFunction

Method to test whether a string matches a specified pattern.

Method to test whether a string matches a specified pattern. This function returns 1 if the input string matches a specified pattern, and 0 if the string does not match the pattern.

In abinitio version % character in pattern means to match zero or more characters and _ character means matches a single character.

def string_lpad(input: Column, len: Int, pad_char: String = " "): Column

Left-pad the input string column with pad_char to a length of len.

Left-pad the input string column with pad_char to a length of len. If length of input column is more than len then returns input column unmodified.

Annotations: @Py4JWhitelist()

def string_lrepad(input: Column, len: Int, char_to_pad_with: String = " "): Column

function trims the string and then pad the string with given character upto given length.

function trims the string and then pad the string with given character upto given length. if the length of trimmed string is equal to or greater than given length than it return input string

input: input string
len: length in number of characters.
char_to_pad_with: A character used to pad input string to length len.
returns: string of a specified length, trimmed of leading and trailing blanks and left-padded with a given character.

Annotations: @Py4JWhitelist()

def string_pad(input: Column, len: Int, char_to_pad_with: String = " "): Column

function pads input on the right with the character char_to_pad_with to make the string length len.

function pads input on the right with the character char_to_pad_with to make the string length len. If str is already len or more characters long, the function returns input unmodified.

Annotations: @Py4JWhitelist()

lazy val string_pad: UserDefinedFunction

lazy val string_pad_with_char: UserDefinedFunction

def string_prefix(input: Column, length: Column): Column

Annotations: @Py4JWhitelist()

def string_repad(input: Column, len: Int, char_to_pad_with: String = " "): Column

function trims the string and then pad the string on right side with given character upto given length.

function trims the string and then pad the string on right side with given character upto given length. if the length of trimmed string is equal to or greater than given length than it return input string

input: input string
len: length in number of characters.
char_to_pad_with: A character used to pad input string to length len.
returns: string of a specified length, trimmed of leading and trailing blanks and left-padded with a given character.

Annotations: @Py4JWhitelist()

def string_replace(input: Column, seekStr: Column, newStr: Column, offset: Column = lit(0)): Column

Function to replace occurrence of seekStr with newStr string in input string after offset characters from first character.

input: input string on which to perform replace operation.
seekStr: string to be replaced in input string.
newStr: string to be used instead of seekStr in input string.
offset: number of characters to skip from begining in input string before performing string_replace operation.
returns: modified string where seekStr is replaced with newStr in input string.

Annotations: @Py4JWhitelist()

lazy val string_replace_first: UserDefinedFunction

lazy val string_replace_first_in_loop: UserDefinedFunction

lazy val string_replace_in_loop: UserDefinedFunction

lazy val string_representation: UserDefinedFunction

lazy val string_rindex: UserDefinedFunction

Returns the index of the first character of the last occurrence of a seek string within another input string.

Returns the index of the first character of the last occurrence of a seek string within another input string. Returned index is 1 based.

lazy val string_rindex_with_offset: UserDefinedFunction

UDF to find index of seekStr in inputStr from end of inputStr skipping offset number of characters from end.

UDF to find index of seekStr in inputStr from end of inputStr skipping offset number of characters from end. Offset index is number of characters, from the end of str, to skip before searching. Returned string position is 1 based position.

lazy val string_run_length_split: UserDefinedFunction

lazy val string_split: UserDefinedFunction

UDF to split input string via delimiter string.

lazy val string_split_no_empty: UserDefinedFunction

UDF to split input string via delimiter string and remove all empty subtrings.

def string_substring(input: Column, start_position: Column, length: Column = lit(Int.MaxValue)): Column

Method to find substring of input string.

input: string on which to find substring.
start_position: 1 based starting position to find substring from.
length: total length of substring to be found.
returns: substring of input string

Annotations: @Py4JWhitelist()

def string_suffix(input: Column, len: Int): Column

Annotations: @Py4JWhitelist()

lazy val string_suffix: UserDefinedFunction

lazy val string_to_hex: UserDefinedFunction

final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes: AnyRef

lazy val testUDFF: UserDefinedFunction

lazy val test_characters_all: UserDefinedFunction

UDF to identify the number of characters in inputStr which are present in charFlag

def timezone_to_utc(timezone: String, time: Column): Column

Method to convert

Annotations: @Py4JWhitelist()

def toString(): String

Definition Classes: AnyRef → Any

def today(): Column

Method to return integer value representing number of days to today from “1-1-1990”.

returns: integer value

Annotations: @Py4JWhitelist()

lazy val translate_bytes: UserDefinedFunction

UDF to return a string in the native character set made up of bytes from the given map.

UDF to return a string in the native character set made up of bytes from the given map. Each byte of the result is the value of map indexed by the character code of the corresponding byte of the input string str. The function returns NULL if any argument is NULL.

lazy val truncateMicroSeconds: UserDefinedFunction

UDF to truncate microseconds part of timestamp.

UDF to truncate microseconds part of timestamp. This is needed as abinitio and spark has some incompatibility in microseconds part of timestamp format.

lazy val type_info: UserDefinedFunction

lazy val type_info_with_includes: UserDefinedFunction

lazy val unique_identifier: UserDefinedFunction

lazy val url_encode_escapes: UserDefinedFunction

lazy val vector_avg: UserDefinedFunction

lazy val vector_stdev: UserDefinedFunction

final def wait(arg0: Long, arg1: Int): Unit

Definition Classes: AnyRef
Annotations: @throws( ... )

final def wait(arg0: Long): Unit

Definition Classes: AnyRef
Annotations: @throws( ... ) @native()

final def wait(): Unit

Definition Classes: AnyRef
Annotations: @throws( ... )

def warning(errorMsg: String): Column

Annotations: @Py4JWhitelist()

def windowSpec(partitionByExpr: Column = lit(1)): WindowSpec

Annotations: @Py4JWhitelist()

def windowSpecPrevRow(partitionByExpr: Column = lit(1)): WindowSpec

Annotations: @Py4JWhitelist()

lazy val writeIntegerToBytes: UserDefinedFunction

lazy val writeLongToBytes: UserDefinedFunction

def xmlStringToJsString(input: String): String

lazy val xmlToJSON: UserDefinedFunction

def xml_split(content: Column, dmlStr: String): Column

Annotations: @Py4JWhitelist()

def yyyyMMdd_to_YYYYJJJ(in_date: Column): Column

Converts yyyyyMMdd to YYYYJJJ

in_date: date in yyyyMMdd format
returns: a date converted to YYYYJJJ

Annotations: @Py4JWhitelist()

def zip_eventInfo_arrays(column1: Column, column2: Column): Column

Method to zip two arrays with first one having event_type and second one having event_text

Annotations: @Py4JWhitelist()

object LongSequence

object RecordIterator extends Serializable

Packages

SparkFunctions

Companion object SparkFunctions

trait SparkFunctions extends AnyRef

Type Members

Value Members

Deprecated Value Members

Inherited from AnyRef

Inherited from Any

Ungrouped

Packages

SparkFunctions 

Companion object SparkFunctions

trait SparkFunctions extends AnyRef

Type Members

Value Members

Deprecated Value Members

Inherited from AnyRef

Inherited from Any

Ungrouped

SparkFunctions