LDA

Instance Constructors

new LDA(corpus: Graph[VD, ED], numTopics: Int, numTerms: Int, numDocs: Long, numTokens: Long, alpha: Float, beta: Float, alphaAS: Float, storageLevel: StorageLevel)

Abstract Value Members

abstract def sampleTokens(corpus: Graph[VD, ED], totalTopicCounter: DenseVector[Count], pseudoIter: Int, numTokens: Long, numTopics: Int, numTerms: Int, alpha: Float, alphaAS: Float, beta: Float): Graph[VD, ED]

Attributes
protected

Concrete Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def docVertices: VertexRDD[VD]
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getCorpus: Graph[VD, ED]
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def mergeDuplicateTopic(threshold: Double = 0.95D): Map[Int, Int]
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def perplexity(): Double

the multiplcation between word distribution among all topics and the corresponding doc distribution among all topics: p(w)=\sum_{k}{p(k|d)*p(w|k)}= \sum_{k}{\frac{{n}_{kw}+{\beta }_{w}} {{n}_{k}+\bar{\beta }} \frac{{n}_{kd}+{\alpha }_{k}}{\sum{{n}_{k}}+ \bar{\alpha }}}
the multiplcation between word distribution among all topics and the corresponding doc distribution among all topics: p(w)=\sum_{k}{p(k|d)*p(w|k)}= \sum_{k}{\frac{{n}_{kw}+{\beta }_{w}} {{n}_{k}+\bar{\beta }} \frac{{n}_{kd}+{\alpha }_{k}}{\sum{{n}_{k}}+ \bar{\alpha }}}
\sum_{k} \frac{{\alpha }_{k}{\beta }_{w} + {n}_{kw}{\alpha }_{k} + {n}_{kd}{\beta }_{w} + {n}_{kw}{n}_{kd}} {{n}_{k}+\bar{\beta }} \frac{1}{\sum{{n}_{k}}+\bar{\alpha }}} \exp^{-(\sum{\log(p(w))})/N} N is the number of tokens in corpus
\bar{\alpha }}} \sum_{k} \frac{{\alpha }_{k}{\beta }_{w} + {n}_{kw}{\alpha }_{k} + {n}_{kd}{\beta }_{w} + {n}_{kw}{n}_{kd}} {{n}_{k}+\bar{\beta }} \frac{1}{\sum{{n}_{k}}+\bar{\alpha }}} \exp^{-(\sum{\log(p(w))})/N} N is the number of tokens in corpus
def runGibbsSampling(totalIter: Int, ChkptInterval: Int = 0, calcPerplexity: Boolean = false): Unit
def saveModel(saveIter: Int = 1): DistributedLDAModel

Save the term-topic related model
Save the term-topic related model
saveIter
saved these iters' averaged model
def setAlpha(alpha: Float): LDA.this.type
def setAlphaAS(alphaAS: Float): LDA.this.type
def setBeta(beta: Float): LDA.this.type
def setSeed(newSeed: Int): LDA.this.type
def setStorageLevel(storageLevel: StorageLevel): LDA.this.type
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def termVertices: VertexRDD[VD]
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

abstract class LDA extends Serializable with Logging

Instance Constructors

new LDA(corpus: Graph[VD, ED], numTopics: Int, numTerms: Int, numDocs: Long, numTokens: Long, alpha: Float, beta: Float, alphaAS: Float, storageLevel: StorageLevel)

Abstract Value Members

abstract def sampleTokens(corpus: Graph[VD, ED], totalTopicCounter: DenseVector[Count], pseudoIter: Int, numTokens: Long, numTopics: Int, numTerms: Int, alpha: Float, alphaAS: Float, beta: Float): Graph[VD, ED]

Concrete Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

def docVertices: VertexRDD[VD]

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def getCorpus: Graph[VD, ED]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

def isTraceEnabled(): Boolean

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

def mergeDuplicateTopic(threshold: Double = 0.95D): Map[Int, Int]

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def perplexity(): Double

\sum_{k} \frac{{\alpha }_{k}{\beta }_{w} + {n}_{kw}{\alpha }_{k} + {n}_{kd}{\beta }_{w} + {n}_{kw}{n}_{kd}} {{n}_{k}+\bar{\beta }} \frac{1}{\sum{{n}_{k}}+\bar{\alpha }}} \exp^{-(\sum{\log(p(w))})/N} N is the number of tokens in corpus

def runGibbsSampling(totalIter: Int, ChkptInterval: Int = 0, calcPerplexity: Boolean = false): Unit

def saveModel(saveIter: Int = 1): DistributedLDAModel

def setAlpha(alpha: Float): LDA.this.type

def setAlphaAS(alphaAS: Float): LDA.this.type

def setBeta(beta: Float): LDA.this.type

def setSeed(newSeed: Int): LDA.this.type

def setStorageLevel(storageLevel: StorageLevel): LDA.this.type

final def synchronized[T0](arg0: ⇒ T0): T0

def termVertices: VertexRDD[VD]

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Logging

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped