Splitter

Type Members

case class TrainTestDataSet[T](training: DataSet[T], testing: DataSet[T])(implicit evidence$1: TypeInformation[T], evidence$2: ClassTag[T]) extends Product with Serializable
case class TrainTestHoldoutDataSet[T](training: DataSet[T], testing: DataSet[T], holdout: DataSet[T])(implicit evidence$3: TypeInformation[T], evidence$4: ClassTag[T]) extends Product with Serializable

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def kFoldSplit[T](input: DataSet[T], kFolds: Int, seed: Long = Utils.RNG.nextLong())(implicit arg0: TypeInformation[T], arg1: ClassTag[T]): Array[TrainTestDataSet[T]]

Split a DataSet into an array of TrainTest DataSets
Split a DataSet into an array of TrainTest DataSets
input
DataSet to be split
kFolds
The number of TrainTest DataSets to be returns. Each 'testing' will be 1/k of the dataset, randomly sampled, the training will be the remainder of the dataset. The DataSet is split into kFolds first, so that no observation will occuring in multiple folds.
seed
Random number generator seed.
returns
An array of TrainTestDataSets
def multiRandomSplit[T](input: DataSet[T], fracArray: Array[Double], seed: Long = Utils.RNG.nextLong())(implicit arg0: TypeInformation[T], arg1: ClassTag[T]): Array[DataSet[T]]

Split a DataSet by the probability fraction of each element of a vector.
Split a DataSet by the probability fraction of each element of a vector.
input
DataSet to be split
fracArray
An array of PROPORTIONS for splitting the DataSet. Unlike the randomSplit function, number greater than 1 do not lead to over sampling. The number of splits is dictated by the length of this array. The number are normalized, eg. Array(1.0, 2.0) would yield two data sets with a 33/66% split.
seed
Random number generator seed.
returns
An array of DataSets whose length is equal to the length of fracArray
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def randomSplit[T](input: DataSet[T], fraction: Double, precise: Boolean = false, seed: Long = Utils.RNG.nextLong())(implicit arg0: TypeInformation[T], arg1: ClassTag[T]): Array[DataSet[T]]

Split a DataSet by the probability fraction of each element.
Split a DataSet by the probability fraction of each element.
input
DataSet to be split
fraction
Probability that each element is chosen, should be [0,1] This fraction refers to the first element in the resulting array.
precise
Sampling by default is random and can result in slightly lop-sided sample sets. When precise is true, equal sample set size are forced, however this is somewhat less efficient.
seed
Random number generator seed.
returns
An array of two datasets
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
def trainTestHoldoutSplit[T](input: DataSet[T], fracTuple: (Double, Double, Double) = (0.6,0.3,0.1), seed: Long = Utils.RNG.nextLong())(implicit arg0: TypeInformation[T], arg1: ClassTag[T]): TrainTestHoldoutDataSet[T]

A wrapper for multiRandomSplit that yields a TrainTestHoldoutDataSet
A wrapper for multiRandomSplit that yields a TrainTestHoldoutDataSet
input
DataSet to be split
fracTuple
A tuple of three doubles, where the first element specifies the size of the training set, the second element the testing set, and the third element is the holdout set. These are proportional and will be normalized internally.
seed
Random number generator seed.
returns
A TrainTestDataSet
def trainTestSplit[T](input: DataSet[T], fraction: Double = 0.6, precise: Boolean = false, seed: Long = Utils.RNG.nextLong())(implicit arg0: TypeInformation[T], arg1: ClassTag[T]): TrainTestDataSet[T]

A wrapper for randomSplit that yields a TrainTestDataSet
A wrapper for randomSplit that yields a TrainTestDataSet
input
DataSet to be split
fraction
Probability that each element is chosen, should be [0,1]. This fraction refers to the training element in TrainTestSplit
precise
Sampling by default is random and can result in slightly lop-sided sample sets. When precise is true, equal sample set size are forced, however this is somewhat less efficient.
seed
Random number generator seed.
returns
A TrainTestDataSet
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package preprocessing

object Splitter

Type Members

case class TrainTestDataSet[T](training: DataSet[T], testing: DataSet[T])(implicit evidence$1: TypeInformation[T], evidence$2: ClassTag[T]) extends Product with Serializable

case class TrainTestHoldoutDataSet[T](training: DataSet[T], testing: DataSet[T], holdout: DataSet[T])(implicit evidence$3: TypeInformation[T], evidence$4: ClassTag[T]) extends Product with Serializable

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

def kFoldSplit[T](input: DataSet[T], kFolds: Int, seed: Long = Utils.RNG.nextLong())(implicit arg0: TypeInformation[T], arg1: ClassTag[T]): Array[TrainTestDataSet[T]]

def multiRandomSplit[T](input: DataSet[T], fracArray: Array[Double], seed: Long = Utils.RNG.nextLong())(implicit arg0: TypeInformation[T], arg1: ClassTag[T]): Array[DataSet[T]]

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def randomSplit[T](input: DataSet[T], fraction: Double, precise: Boolean = false, seed: Long = Utils.RNG.nextLong())(implicit arg0: TypeInformation[T], arg1: ClassTag[T]): Array[DataSet[T]]

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

def trainTestHoldoutSplit[T](input: DataSet[T], fracTuple: (Double, Double, Double) = (0.6,0.3,0.1), seed: Long = Utils.RNG.nextLong())(implicit arg0: TypeInformation[T], arg1: ClassTag[T]): TrainTestHoldoutDataSet[T]

def trainTestSplit[T](input: DataSet[T], fraction: Double = 0.6, precise: Boolean = false, seed: Long = Utils.RNG.nextLong())(implicit arg0: TypeInformation[T], arg1: ClassTag[T]): TrainTestDataSet[T]

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from AnyRef

Inherited from Any

Ungrouped