RFClassifier

Instance Constructors

new RFClassifier(numTrees: Int = 100, maxTreeDepth: Int = 20, trainBagPct: Double = 0.66, utilityTooSmallThreshold: Double = 0, splitTooSmallPct: Double = 0.0, numThreads: Int = 0, howManyFeaturesPerNode: (Int) ⇒ Int = RFClassifier.featuresPerNodeSqrt, nilLabel: Option[L] = None)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def buildTree(job: RFJob[L, F]): RFTree

Constructs a single decision tree from the given dataset sample
def buildTreeMain(job: RFJob[L, F]): RFTree
def classOf(d: Datum[L, F]): L

Returns the argmax for this datum
Returns the argmax for this datum

Definition Classes
RFClassifier → Classifier
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def computeContingencyTables(job: RFJob[L, F], features: Array[Int]): Array[Array[(Counter[Int], Counter[Int])]]

Computes the contingency tables for all given features and dataset partition For each feature and possible threshold (hence the double array), we store a distribution of datum labels that are <= than the threshold (_1 in the tuple), or larger than the threshold (_2 in the tuple) This method does not consider 0 values! See updateContingencyTables for that.
def computeFeatureThresholds(dataset: CounterDataset[L, F]): Array[Array[Double]]

Computes the value thresholds for all features in this dataset
Computes the value thresholds for all features in this dataset
dataset
The dataset
returns
An array of thresholds (Double) for each feature in the dataset; feature indices are used for indexing
def debugUtility(utility: Utility, job: RFJob[L, F]): Unit
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def featureUtility(feature: Int, thresholds: Array[Double], contingencyTables: Array[(Counter[Int], Counter[Int])], activeNodes: Set[(Int, Double)], currentUtility: Double): Option[Utility]

Computes the utility of the given feature
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
def informationGain(feature: Int, thresholds: Array[Double], contingencyTables: Array[(Counter[Int], Counter[Int])], activeNodes: Set[(Int, Double)], currentEntropy: Double): Option[Utility]

Computes the utility of the given feature using information gain
def informationGainForThreshold(feature: Int, threshold: Double, contingencyTable: (Counter[Int], Counter[Int]), currentEntropy: Double): Option[Utility]

Computes IG for a given feature and threshold
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def mkBag(dataset: CounterDataset[L, F], indices: Array[Int], thresholds: Array[Array[Double]], trainIndicesLength: Int, entropy: Double, random: Random, offset: Int): RFJob[L, F]
def mkLeftJob(job: RFJob[L, F], feature: Int, threshold: Double, entropy: Double, activeNodes: Set[(Int, Double)]): RFJob[L, F]

Constructs a job from the datums containing values of this feature smaller or equal than the threshold
def mkRightJob(job: RFJob[L, F], feature: Int, threshold: Double, entropy: Double, activeNodes: Set[(Int, Double)]): RFJob[L, F]

Constructs a job from the datums containing values of this feature larger than the threshold
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def printContingencyTables(tables: Array[Array[(Counter[Int], Counter[Int])]], thresholds: Array[Array[Double]]): Unit
def prune(tree: RFTree): RFTree
def quantiles(values: Counter[Double], binCount: Int): Array[Double]

Computes binCount-1 quantile values, such that the sequence of values is split into binCount bins
def randomFeatureSelection(presentFeatures: Set[Int], numFeats: Int, random: Random): Array[Int]

Randomly picks selectedFeats features between 0 ..
Randomly picks selectedFeats features between 0 .. numFeats
def sameLabels(job: RFJob[L, F]): Boolean
def saveTo(writer: Writer): Unit

Saves to writer.
Saves to writer. Does NOT close the writer

Definition Classes
RFClassifier → Classifier
def saveTo(fileName: String): Unit

Saves the current model to a file
Saves the current model to a file

Definition Classes
Classifier
def scoresOf(d: Datum[L, F]): Counter[L]

Returns the scores of all possible labels for this datum Convention: if the classifier can return probabilities, these must be probabilities
Returns the scores of all possible labels for this datum Convention: if the classifier can return probabilities, these must be probabilities

Definition Classes
RFClassifier → Classifier
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
def train(dataset: CounterDataset[L, F], indices: Array[Int]): Unit

Trains a classifier using a CounterDataset (better to compute feature utility)
def train(dataset: Dataset[L, F], indices: Array[Int]): Unit

Trains a classifier, using only the datums specified in indices indices is useful for bagging
Trains a classifier, using only the datums specified in indices indices is useful for bagging

Definition Classes
RFClassifier → Classifier
def train(dataset: Dataset[L, F], spans: Option[Iterable[(Int, Int)]] = None): Unit

Trains the classifier on the given dataset spans is useful during cross validation
Trains the classifier on the given dataset spans is useful during cross validation

Definition Classes
Classifier
var treeCount: Int
var trees: Option[Array[RFTree]]
def updateContingencyTables(tables: Array[(Counter[Int], Counter[Int])], label: Int, fv: Double, thresholds: Array[Double]): Unit
def updateContingencyTables(features: Array[Int], contingencyTables: Array[Array[(Counter[Int], Counter[Int])]], overallLabels: Counter[Int]): Unit
var verbose: Boolean
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: object RFClassifier | package learning

class RFClassifier[L, F] extends Classifier[L, F] with Serializable

Instance Constructors

new RFClassifier(numTrees: Int = 100, maxTreeDepth: Int = 20, trainBagPct: Double = 0.66, utilityTooSmallThreshold: Double = 0, splitTooSmallPct: Double = 0.0, numThreads: Int = 0, howManyFeaturesPerNode: (Int) ⇒ Int = RFClassifier.featuresPerNodeSqrt, nilLabel: Option[L] = None)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def buildTree(job: RFJob[L, F]): RFTree

def buildTreeMain(job: RFJob[L, F]): RFTree

def classOf(d: Datum[L, F]): L

def clone(): AnyRef

def computeContingencyTables(job: RFJob[L, F], features: Array[Int]): Array[Array[(Counter[Int], Counter[Int])]]

def computeFeatureThresholds(dataset: CounterDataset[L, F]): Array[Array[Double]]

def debugUtility(utility: Utility, job: RFJob[L, F]): Unit

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def featureUtility(feature: Int, thresholds: Array[Double], contingencyTables: Array[(Counter[Int], Counter[Int])], activeNodes: Set[(Int, Double)], currentUtility: Double): Option[Utility]

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

def informationGain(feature: Int, thresholds: Array[Double], contingencyTables: Array[(Counter[Int], Counter[Int])], activeNodes: Set[(Int, Double)], currentEntropy: Double): Option[Utility]

def informationGainForThreshold(feature: Int, threshold: Double, contingencyTable: (Counter[Int], Counter[Int]), currentEntropy: Double): Option[Utility]

final def isInstanceOf[T0]: Boolean

def mkBag(dataset: CounterDataset[L, F], indices: Array[Int], thresholds: Array[Array[Double]], trainIndicesLength: Int, entropy: Double, random: Random, offset: Int): RFJob[L, F]

def mkLeftJob(job: RFJob[L, F], feature: Int, threshold: Double, entropy: Double, activeNodes: Set[(Int, Double)]): RFJob[L, F]

def mkRightJob(job: RFJob[L, F], feature: Int, threshold: Double, entropy: Double, activeNodes: Set[(Int, Double)]): RFJob[L, F]

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def printContingencyTables(tables: Array[Array[(Counter[Int], Counter[Int])]], thresholds: Array[Array[Double]]): Unit

def prune(tree: RFTree): RFTree

def quantiles(values: Counter[Double], binCount: Int): Array[Double]

def randomFeatureSelection(presentFeatures: Set[Int], numFeats: Int, random: Random): Array[Int]

def sameLabels(job: RFJob[L, F]): Boolean

def saveTo(writer: Writer): Unit

def saveTo(fileName: String): Unit

def scoresOf(d: Datum[L, F]): Counter[L]

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

def train(dataset: CounterDataset[L, F], indices: Array[Int]): Unit

def train(dataset: Dataset[L, F], indices: Array[Int]): Unit

def train(dataset: Dataset[L, F], spans: Option[Iterable[(Int, Int)]] = None): Unit

var treeCount: Int

var trees: Option[Array[RFTree]]

def updateContingencyTables(tables: Array[(Counter[Int], Counter[Int])], label: Int, fv: Double, thresholds: Array[Double]): Unit

def updateContingencyTables(features: Array[Int], contingencyTables: Array[Array[(Counter[Int], Counter[Int])]], overallLabels: Counter[Int]): Unit

var verbose: Boolean

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Classifier[L, F]

Inherited from AnyRef

Inherited from Any

Ungrouped