ExcelOptions

Options passed to org.apache.spark.sql.DataFrameReader and org.apache.spark.sql.DataFrameWriter for reading and writing Microsoft Excel files. Excel support is provided by the spark-excel project (see link below).

sheetName: Optional name of the Excel Sheet to read from/write to.
numLinesToSkip: Optional number of rows in the excel spreadsheet to skip before any data is read. This option must not be set for writing.
startColumn: Optional first column in the specified Excel Sheet to read from (as string, e.g B). This option must not be set for writing.
endColumn: Optional last column in the specified Excel Sheet to read from (as string, e.g. F).
rowLimit: Optional limit of the number of rows being returned on read. This is applied after numLinesToSkip.
useHeader: If true, the first row of the excel sheet specifies the column names (default: true).
treatEmptyValuesAsNulls: Empty cells are parsed as null values (default: true).
inferSchema: Infer the schema of the excel sheet automatically (default: true).
timestampFormat: A format string specifying the format to use when writing timestamps (default: dd-MM-yyyy HH:mm:ss).
dateFormat: A format string specifying the format to use when writing dates.
maxRowsInMemory: The number of rows that are stored in memory. If set, a streaming reader is used which can help with big files.
excerptSize: Sample size for schema inference.

See also: https://github.com/crealytics/spark-excel

Linear Supertypes

Serializable, Serializable, Product, Equals, AnyRef, Any

Instance Constructors

new ExcelOptions(sheetName: Option[String] = None, numLinesToSkip: Option[Int] = None, startColumn: Option[String] = None, endColumn: Option[String] = None, rowLimit: Option[Int] = None, useHeader: Boolean = true, treatEmptyValuesAsNulls: Option[Boolean] = Some(true), inferSchema: Option[Boolean] = Some(true), timestampFormat: Option[String] = Some("dd-MM-yyyy HH:mm:ss"), dateFormat: Option[String] = None, maxRowsInMemory: Option[Int] = None, excerptSize: Option[Int] = None)

sheetName
Optional name of the Excel Sheet to read from/write to.
numLinesToSkip
Optional number of rows in the excel spreadsheet to skip before any data is read. This option must not be set for writing.
startColumn
Optional first column in the specified Excel Sheet to read from (as string, e.g B). This option must not be set for writing.
endColumn
Optional last column in the specified Excel Sheet to read from (as string, e.g. F).
rowLimit
Optional limit of the number of rows being returned on read. This is applied after numLinesToSkip.
useHeader
If true, the first row of the excel sheet specifies the column names (default: true).
treatEmptyValuesAsNulls
Empty cells are parsed as null values (default: true).
inferSchema
Infer the schema of the excel sheet automatically (default: true).
timestampFormat
A format string specifying the format to use when writing timestamps (default: dd-MM-yyyy HH:mm:ss).
dateFormat
A format string specifying the format to use when writing dates.
maxRowsInMemory
The number of rows that are stored in memory. If set, a streaming reader is used which can help with big files.
excerptSize
Sample size for schema inference.

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
val dateFormat: Option[String]

A format string specifying the format to use when writing dates.
val endColumn: Option[String]

Optional last column in the specified Excel Sheet to read from (as string, e.g.
Optional last column in the specified Excel Sheet to read from (as string, e.g. F).
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
val excerptSize: Option[Int]

Sample size for schema inference.
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getDataAddress: Option[String]
val inferSchema: Option[Boolean]

Infer the schema of the excel sheet automatically (default: true).
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
val maxRowsInMemory: Option[Int]

The number of rows that are stored in memory.
The number of rows that are stored in memory. If set, a streaming reader is used which can help with big files.
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
val numLinesToSkip: Option[Int]

Optional number of rows in the excel spreadsheet to skip before any data is read.
Optional number of rows in the excel spreadsheet to skip before any data is read. This option must not be set for writing.
val rowLimit: Option[Int]

Optional limit of the number of rows being returned on read.
Optional limit of the number of rows being returned on read. This is applied after numLinesToSkip.
val sheetName: Option[String]

Optional name of the Excel Sheet to read from/write to.
val startColumn: Option[String]

Optional first column in the specified Excel Sheet to read from (as string, e.g B).
Optional first column in the specified Excel Sheet to read from (as string, e.g B). This option must not be set for writing.
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
val timestampFormat: Option[String]

A format string specifying the format to use when writing timestamps (default: dd-MM-yyyy HH:mm:ss).
def toMap(schema: Option[StructType]): Map[String, Option[Any]]
val treatEmptyValuesAsNulls: Option[Boolean]

Empty cells are parsed as null values (default: true).
val useHeader: Boolean

If true, the first row of the excel sheet specifies the column names (default: true).
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: object ExcelOptions | package dataobject

Instance Constructors

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

val dateFormat: Option[String]

val endColumn: Option[String]

final def eq(arg0: AnyRef): Boolean

val excerptSize: Option[Int]

def finalize(): Unit

final def getClass(): Class[_]

def getDataAddress: Option[String]

val inferSchema: Option[Boolean]

final def isInstanceOf[T0]: Boolean

val maxRowsInMemory: Option[Int]

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

val numLinesToSkip: Option[Int]

val rowLimit: Option[Int]

val sheetName: Option[String]

val startColumn: Option[String]

final def synchronized[T0](arg0: ⇒ T0): T0

val timestampFormat: Option[String]

def toMap(schema: Option[StructType]): Map[String, Option[Any]]

val treatEmptyValuesAsNulls: Option[Boolean]

val useHeader: Boolean

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped