LDA

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
def incrementalTrain(docs: RDD[BOW], computedModel: LocalLDAModel, totalIter: Int, LDAAlgorithm: String, partStrategy: String, chkptInterval: Int, calcPerplexity: Boolean, storageLevel: StorageLevel): DistributedLDAModel
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
def train(docs: RDD[BOW], totalIter: Int, numTopics: Int, alpha: Float, beta: Float, alphaAS: Float, LDAAlgorithm: String, partStrategy: String, chkptInterval: Int, calcPerplexity: Boolean, storageLevel: StorageLevel): DistributedLDAModel

LDA training
LDA training
docs
RDD of documents, which are term (word) count vectors paired with IDs. The term count vectors are "bags of words" with a fixed-size vocabulary (where the vocabulary size is the length of the vector). Document IDs must be unique and >= 0.
totalIter
the number of iterations
numTopics
the number of topics (5000+ for large data)
alpha
recommend to be (5.0 /numTopics)
beta
recommend to be in range 0.001 - 0.1
alphaAS
recommend to be in range 0.01 - 1.0
LDAAlgorithm
which LDA sampling algorithm to use, recommend not lightlda for short text
partStrategy
which partition strategy to re partition by the graph
storageLevel
StorageLevel that the LDA Model RDD uses
returns
DistributedLDAModel
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: class LDA | package clustering

object LDA extends Serializable

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

def incrementalTrain(docs: RDD[BOW], computedModel: LocalLDAModel, totalIter: Int, LDAAlgorithm: String, partStrategy: String, chkptInterval: Int, calcPerplexity: Boolean, storageLevel: StorageLevel): DistributedLDAModel

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

def train(docs: RDD[BOW], totalIter: Int, numTopics: Int, alpha: Float, beta: Float, alphaAS: Float, LDAAlgorithm: String, partStrategy: String, chkptInterval: Int, calcPerplexity: Boolean, storageLevel: StorageLevel): DistributedLDAModel

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped