ReaderParameters

These are properties for customizing mainframe binary data reader.

recordFormat: Record format
isEbcdic: If true the input data file encoding is EBCDIC, otherwise it is ASCII
isText: If true line ending characters will be used (LF / CRLF) as the record separator
ebcdicCodePage: Specifies what code page to use for EBCDIC to ASCII/Unicode conversions
ebcdicCodePageClass: An optional custom code page conversion class provided by a user
asciiCharset: A charset for ASCII data
fieldCodePage: Specifies a mapping between a field name and the code page
isUtf16BigEndian: If true UTF-16 strings are considered big-endian.
floatingPointFormat: A format of floating-point numbers
variableSizeOccurs: If true, OCCURS DEPENDING ON data size will depend on the number of elements
recordLength: Specifies the length of the record disregarding the copybook record size. Implied the file has fixed record length.
lengthFieldExpression: A name of a field that contains record length. Optional. If not set the copybook record length will be used.
isRecordSequence: Does input files have 4 byte record length headers
bdw: Block descriptor word (if specified), for FB and VB record formats
isRdwPartRecLength: Does RDW count itself as part of record length itself
rdwAdjustment: Controls a mismatch between RDW and record length
isIndexGenerationNeeded: Is indexing input file before processing is requested
inputSplitRecords: The number of records to include in each partition. Notice mainframe records may have variable size, inputSplitMB is the recommended option
inputSplitSizeMB: A partition size to target. In certain circumstances this size may not be exactly that, but the library will do the best effort to target that size
hdfsDefaultBlockSize: Default HDFS block size for the HDFS filesystem used. This value is used as the default split size if inputSplitSizeMB is not specified
startOffset: An offset to the start of the record in each binary data block.
endOffset: An offset from the end of the record to the end of the binary data block.
fileStartOffset: A number of bytes to skip at the beginning of each file
fileEndOffset: A number of bytes to skip at the end of each file
generateRecordId: If true, a record id field will be prepended to each record.
schemaPolicy: Specifies a policy to transform the input schema. The default policy is to keep the schema exactly as it is in the copybook.
stringTrimmingPolicy: Specifies if and how strings should be trimmed when parsed.
allowPartialRecords: If true, partial ASCII records can be parsed (in cases when LF character is missing for example)
multisegment: Parameters specific to reading multisegment files
commentPolicy: A comment truncation policy
improvedNullDetection: If true, string values that contain only zero bytes (0x0) will be considered null.
dropGroupFillers: If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields
dropValueFillers: If true the parser will drop all value FILLER fields
fillerNamingPolicy: Specifies the strategy of renaming FILLER names to make them unique
nonTerminals: A list of non-terminals (GROUPS) to combine and parse as primitive fields
debugFieldsPolicy: Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
recordHeaderParser: A parser used to parse data field record headers
rhpAdditionalInfo: An optional additional option string passed to a custom record header parser
inputFileNameColumn: A column name to add to the dataframe. The column will contain input file name for each record similar to 'input_file_name()' function
extendedMetadata: If true, Spark schema will be generated with additional metadata (e.g. PICs, USAGE, etc.)

Linear Supertypes

Serializable, Serializable, Product, Equals, AnyRef, Any

Instance Constructors

new ReaderParameters(recordFormat: RecordFormat = FixedLength, isEbcdic: Boolean = true, isText: Boolean = false, ebcdicCodePage: String = "common", ebcdicCodePageClass: Option[String] = None, asciiCharset: String = "", fieldCodePage: Map[String, String] = Map.empty[String, String], isUtf16BigEndian: Boolean = true, floatingPointFormat: FloatingPointFormat = FloatingPointFormat.IBM, variableSizeOccurs: Boolean = false, recordLength: Option[Int] = None, lengthFieldExpression: Option[String] = None, isRecordSequence: Boolean = false, bdw: Option[Bdw] = None, isRdwBigEndian: Boolean = false, isRdwPartRecLength: Boolean = false, rdwAdjustment: Int = 0, isIndexGenerationNeeded: Boolean = false, inputSplitRecords: Option[Int] = None, inputSplitSizeMB: Option[Int] = None, hdfsDefaultBlockSize: Option[Int] = None, startOffset: Int = 0, endOffset: Int = 0, fileStartOffset: Int = 0, fileEndOffset: Int = 0, generateRecordId: Boolean = false, schemaPolicy: SchemaRetentionPolicy = SchemaRetentionPolicy.KeepOriginal, stringTrimmingPolicy: StringTrimmingPolicy = StringTrimmingPolicy.TrimBoth, allowPartialRecords: Boolean = false, multisegment: Option[MultisegmentParameters] = None, commentPolicy: CommentPolicy = CommentPolicy(), strictSignOverpunch: Boolean = true, improvedNullDetection: Boolean = false, dropGroupFillers: Boolean = false, dropValueFillers: Boolean = true, fillerNamingPolicy: FillerNamingPolicy = FillerNamingPolicy.SequenceNumbers, nonTerminals: Seq[String] = Nil, occursMappings: Map[String, Map[String, Int]] = Map(), debugFieldsPolicy: DebugFieldsPolicy = DebugFieldsPolicy.NoDebug, recordHeaderParser: Option[String] = None, recordExtractor: Option[String] = None, rhpAdditionalInfo: Option[String] = None, reAdditionalInfo: String = "", inputFileNameColumn: String = "", extendedMetadata: Boolean = false)

recordFormat
Record format
isEbcdic
If true the input data file encoding is EBCDIC, otherwise it is ASCII
isText
If true line ending characters will be used (LF / CRLF) as the record separator
ebcdicCodePage
Specifies what code page to use for EBCDIC to ASCII/Unicode conversions
ebcdicCodePageClass
An optional custom code page conversion class provided by a user
asciiCharset
A charset for ASCII data
fieldCodePage
Specifies a mapping between a field name and the code page
isUtf16BigEndian
If true UTF-16 strings are considered big-endian.
floatingPointFormat
A format of floating-point numbers
variableSizeOccurs
If true, OCCURS DEPENDING ON data size will depend on the number of elements
recordLength
Specifies the length of the record disregarding the copybook record size. Implied the file has fixed record length.
lengthFieldExpression
A name of a field that contains record length. Optional. If not set the copybook record length will be used.
isRecordSequence
Does input files have 4 byte record length headers
bdw
Block descriptor word (if specified), for FB and VB record formats
isRdwPartRecLength
Does RDW count itself as part of record length itself
rdwAdjustment
Controls a mismatch between RDW and record length
isIndexGenerationNeeded
Is indexing input file before processing is requested
inputSplitRecords
The number of records to include in each partition. Notice mainframe records may have variable size, inputSplitMB is the recommended option
inputSplitSizeMB
A partition size to target. In certain circumstances this size may not be exactly that, but the library will do the best effort to target that size
hdfsDefaultBlockSize
Default HDFS block size for the HDFS filesystem used. This value is used as the default split size if inputSplitSizeMB is not specified
startOffset
An offset to the start of the record in each binary data block.
endOffset
An offset from the end of the record to the end of the binary data block.
fileStartOffset
A number of bytes to skip at the beginning of each file
fileEndOffset
A number of bytes to skip at the end of each file
generateRecordId
If true, a record id field will be prepended to each record.
schemaPolicy
Specifies a policy to transform the input schema. The default policy is to keep the schema exactly as it is in the copybook.
stringTrimmingPolicy
Specifies if and how strings should be trimmed when parsed.
allowPartialRecords
If true, partial ASCII records can be parsed (in cases when LF character is missing for example)
multisegment
Parameters specific to reading multisegment files
commentPolicy
A comment truncation policy
improvedNullDetection
If true, string values that contain only zero bytes (0x0) will be considered null.
dropGroupFillers
If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields
dropValueFillers
If true the parser will drop all value FILLER fields
fillerNamingPolicy
Specifies the strategy of renaming FILLER names to make them unique
nonTerminals
A list of non-terminals (GROUPS) to combine and parse as primitive fields
debugFieldsPolicy
Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
recordHeaderParser
A parser used to parse data field record headers
rhpAdditionalInfo
An optional additional option string passed to a custom record header parser
inputFileNameColumn
A column name to add to the dataframe. The column will contain input file name for each record similar to 'input_file_name()' function
extendedMetadata
If true, Spark schema will be generated with additional metadata (e.g. PICs, USAGE, etc.)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
val allowPartialRecords: Boolean

If true, partial ASCII records can be parsed (in cases when LF character is missing for example)
final def asInstanceOf[T0]: T0

Definition Classes
Any
val asciiCharset: String

A charset for ASCII data
val bdw: Option[Bdw]

Block descriptor word (if specified), for FB and VB record formats
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
val commentPolicy: CommentPolicy

A comment truncation policy
val debugFieldsPolicy: DebugFieldsPolicy

Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
val dropGroupFillers: Boolean

If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields
val dropValueFillers: Boolean

If true the parser will drop all value FILLER fields
val ebcdicCodePage: String

Specifies what code page to use for EBCDIC to ASCII/Unicode conversions
val ebcdicCodePageClass: Option[String]

An optional custom code page conversion class provided by a user
val endOffset: Int

An offset from the end of the record to the end of the binary data block.
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
val extendedMetadata: Boolean

If true, Spark schema will be generated with additional metadata (e.g.
If true, Spark schema will be generated with additional metadata (e.g. PICs, USAGE, etc.)
val fieldCodePage: Map[String, String]

Specifies a mapping between a field name and the code page
val fileEndOffset: Int

A number of bytes to skip at the end of each file
val fileStartOffset: Int

A number of bytes to skip at the beginning of each file
val fillerNamingPolicy: FillerNamingPolicy

Specifies the strategy of renaming FILLER names to make them unique
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
val floatingPointFormat: FloatingPointFormat

A format of floating-point numbers
val generateRecordId: Boolean

If true, a record id field will be prepended to each record.
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
val hdfsDefaultBlockSize: Option[Int]

Default HDFS block size for the HDFS filesystem used.
Default HDFS block size for the HDFS filesystem used. This value is used as the default split size if inputSplitSizeMB is not specified
val improvedNullDetection: Boolean

If true, string values that contain only zero bytes (0x0) will be considered null.
val inputFileNameColumn: String

A column name to add to the dataframe.
A column name to add to the dataframe. The column will contain input file name for each record similar to 'input_file_name()' function
val inputSplitRecords: Option[Int]

The number of records to include in each partition.
The number of records to include in each partition. Notice mainframe records may have variable size, inputSplitMB is the recommended option
val inputSplitSizeMB: Option[Int]

A partition size to target.
A partition size to target. In certain circumstances this size may not be exactly that, but the library will do the best effort to target that size
val isEbcdic: Boolean

If true the input data file encoding is EBCDIC, otherwise it is ASCII
val isIndexGenerationNeeded: Boolean

Is indexing input file before processing is requested
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
val isRdwBigEndian: Boolean
val isRdwPartRecLength: Boolean

Does RDW count itself as part of record length itself
val isRecordSequence: Boolean

Does input files have 4 byte record length headers
val isText: Boolean

If true line ending characters will be used (LF / CRLF) as the record separator
val isUtf16BigEndian: Boolean

If true UTF-16 strings are considered big-endian.
val lengthFieldExpression: Option[String]

A name of a field that contains record length.
A name of a field that contains record length. Optional. If not set the copybook record length will be used.
val multisegment: Option[MultisegmentParameters]

Parameters specific to reading multisegment files
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
val nonTerminals: Seq[String]

A list of non-terminals (GROUPS) to combine and parse as primitive fields
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
val occursMappings: Map[String, Map[String, Int]]
val rdwAdjustment: Int

Controls a mismatch between RDW and record length
val reAdditionalInfo: String
val recordExtractor: Option[String]
val recordFormat: RecordFormat

Record format
val recordHeaderParser: Option[String]

A parser used to parse data field record headers
val recordLength: Option[Int]

Specifies the length of the record disregarding the copybook record size.
Specifies the length of the record disregarding the copybook record size. Implied the file has fixed record length.
val rhpAdditionalInfo: Option[String]

An optional additional option string passed to a custom record header parser
val schemaPolicy: SchemaRetentionPolicy

Specifies a policy to transform the input schema.
Specifies a policy to transform the input schema. The default policy is to keep the schema exactly as it is in the copybook.
val startOffset: Int

An offset to the start of the record in each binary data block.
val strictSignOverpunch: Boolean
val stringTrimmingPolicy: StringTrimmingPolicy

Specifies if and how strings should be trimmed when parsed.
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
val variableSizeOccurs: Boolean

If true, OCCURS DEPENDING ON data size will depend on the number of elements
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package parameters

Instance Constructors

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

val allowPartialRecords: Boolean

final def asInstanceOf[T0]: T0

val asciiCharset: String

val bdw: Option[Bdw]

def clone(): AnyRef

val commentPolicy: CommentPolicy

val debugFieldsPolicy: DebugFieldsPolicy

val dropGroupFillers: Boolean

val dropValueFillers: Boolean

val ebcdicCodePage: String

val ebcdicCodePageClass: Option[String]

val endOffset: Int

final def eq(arg0: AnyRef): Boolean

val extendedMetadata: Boolean

val fieldCodePage: Map[String, String]

val fileEndOffset: Int

val fileStartOffset: Int

val fillerNamingPolicy: FillerNamingPolicy

def finalize(): Unit

val floatingPointFormat: FloatingPointFormat

val generateRecordId: Boolean

final def getClass(): Class[_]

val hdfsDefaultBlockSize: Option[Int]

val improvedNullDetection: Boolean

val inputFileNameColumn: String

val inputSplitRecords: Option[Int]

val inputSplitSizeMB: Option[Int]

val isEbcdic: Boolean

val isIndexGenerationNeeded: Boolean

final def isInstanceOf[T0]: Boolean

val isRdwBigEndian: Boolean

val isRdwPartRecLength: Boolean

val isRecordSequence: Boolean

val isText: Boolean

val isUtf16BigEndian: Boolean

val lengthFieldExpression: Option[String]

val multisegment: Option[MultisegmentParameters]

final def ne(arg0: AnyRef): Boolean

val nonTerminals: Seq[String]

final def notify(): Unit

final def notifyAll(): Unit

val occursMappings: Map[String, Map[String, Int]]

val rdwAdjustment: Int

val reAdditionalInfo: String

val recordExtractor: Option[String]

val recordFormat: RecordFormat

val recordHeaderParser: Option[String]

val recordLength: Option[Int]

val rhpAdditionalInfo: Option[String]

val schemaPolicy: SchemaRetentionPolicy

val startOffset: Int

val strictSignOverpunch: Boolean

val stringTrimmingPolicy: StringTrimmingPolicy

final def synchronized[T0](arg0: ⇒ T0): T0

val variableSizeOccurs: Boolean

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped