LightLDA

Instance Constructors

new LightLDA(corpus: Graph[VD, ED], numTopics: Int, numTerms: Int, numDocs: Long, numTokens: Long, alpha: Float, beta: Float, alphaAS: Float, storageLevel: StorageLevel)

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def docVertices: VertexRDD[VD]

Definition Classes
LDA
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getCorpus: Graph[VD, ED]

Definition Classes
LDA
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def mergeDuplicateTopic(threshold: Double = 0.95D): Map[Int, Int]

Definition Classes
LDA
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def perplexity(): Double

the multiplcation between word distribution among all topics and the corresponding doc distribution among all topics: p(w)=\sum_{k}{p(k|d)*p(w|k)}= \sum_{k}{\frac{{n}_{kw}+{\beta }_{w}} {{n}_{k}+\bar{\beta }} \frac{{n}_{kd}+{\alpha }_{k}}{\sum{{n}_{k}}+ \bar{\alpha }}}
the multiplcation between word distribution among all topics and the corresponding doc distribution among all topics: p(w)=\sum_{k}{p(k|d)*p(w|k)}= \sum_{k}{\frac{{n}_{kw}+{\beta }_{w}} {{n}_{k}+\bar{\beta }} \frac{{n}_{kd}+{\alpha }_{k}}{\sum{{n}_{k}}+ \bar{\alpha }}}
\sum_{k} \frac{{\alpha }_{k}{\beta }_{w} + {n}_{kw}{\alpha }_{k} + {n}_{kd}{\beta }_{w} + {n}_{kw}{n}_{kd}} {{n}_{k}+\bar{\beta }} \frac{1}{\sum{{n}_{k}}+\bar{\alpha }}} \exp^{-(\sum{\log(p(w))})/N} N is the number of tokens in corpus
\bar{\alpha }}} \sum_{k} \frac{{\alpha }_{k}{\beta }_{w} + {n}_{kw}{\alpha }_{k} + {n}_{kd}{\beta }_{w} + {n}_{kw}{n}_{kd}} {{n}_{k}+\bar{\beta }} \frac{1}{\sum{{n}_{k}}+\bar{\alpha }}} \exp^{-(\sum{\log(p(w))})/N} N is the number of tokens in corpus

Definition Classes
LDA
def runGibbsSampling(totalIter: Int, ChkptInterval: Int = 0, calcPerplexity: Boolean = false): Unit

Definition Classes
LDA
def sampleTokens(corpus: Graph[VD, ED], totalTopicCounter: DenseVector[Count], pseudoIter: Int, numTokens: Long, numTopics: Int, numTerms: Int, alpha: Float, alphaAS: Float, beta: Float): Graph[VD, ED]

Attributes
protected
Definition Classes
LightLDA → LDA
def saveModel(saveIter: Int = 1): DistributedLDAModel

Save the term-topic related model
Save the term-topic related model
saveIter
saved these iters' averaged model

Definition Classes
LDA
def setAlpha(alpha: Float): LightLDA.this.type

Definition Classes
LDA
def setAlphaAS(alphaAS: Float): LightLDA.this.type

Definition Classes
LDA
def setBeta(beta: Float): LightLDA.this.type

Definition Classes
LDA
def setSeed(newSeed: Int): LightLDA.this.type

Definition Classes
LDA
def setStorageLevel(storageLevel: StorageLevel): LightLDA.this.type

Definition Classes
LDA
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def termVertices: VertexRDD[VD]

Definition Classes
LDA
def toString(): String

Definition Classes
AnyRef → Any
def tokenSampling(gen: Random, docTopicCounter: VD, termTopicCounter: VD, docProposal: Boolean, currentTopic: Int, proposalTopic: Int, q: (VD, Int, Boolean) ⇒ Float, p: (VD, VD, Int, Boolean) ⇒ Float): Int

Composition of both Gibbs sampler and Metropolis Hastings sampler time complexity for each sampling is: O(1) 1.
Composition of both Gibbs sampler and Metropolis Hastings sampler time complexity for each sampling is: O(1) 1. sampling word-related parts of standard LDA formula via Gibbs Sampler: Formula (6) in Paper "LightLDA: Big Topic Models on Modest Compute Clusters": ( \frac{{n}_{kd}^{{-di}+{\beta }_{w}}{{n}_{k}}{-di}+\bar{\beta }} ) 2. given the computed probability in step 1 as proposal distribution q in Metropolis Hasting sampling, and we use asymmetric dirichlet prior, presented formula (3) in Paper "Rethinking LDA: Why Priors Matter" \frac{{n}_{kw}^{{-di}+{\beta }_{w}}{{n}_{k}}{-di}+\bar{\beta}} \frac{{n}_{kd} ^{{-di}+ \bar{\alpha}
\frac{{n}_{k}}{-di} + \acute{\alpha}}{\sum{n}_{k} +\bar{\acute{\alpha }}} }{\sum{n}_{kd}^{-di} +\bar{\alpha}}
where \bar{\beta}=\sum_{w}{\beta}_{w} \bar{\alpha}=\sum_{k}{\alpha}_{k} \bar{\acute{\alpha}}=\bar{\acute{\alpha}}=\sum_{k}\acute{\alpha} {n}_{kd} is the number of tokens in doc d that belong to topic k {n}_{kw} is the number of occurrence for word w that belong to topic k {n}_{k} is the number of tokens in corpus that belong to topic k
\frac{{n}_{k}^{-di} + \acute{\alpha}}{\sum{n}_{k} +\bar{\acute{\alpha }}}
where \bar{\beta}=\sum_{w}{\beta}_{w} \bar{\alpha}=\sum_{k}{\alpha}_{k} \bar{\acute{\alpha}}=\bar{\acute{\alpha}}=\sum_{k}\acute{\alpha} {n}_{kd} is the number of tokens in doc d that belong to topic k {n}_{kw} is the number of occurrence for word w that belong to topic k {n}_{k} is the number of tokens in corpus that belong to topic k
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

class LightLDA extends LDA

Instance Constructors

new LightLDA(corpus: Graph[VD, ED], numTopics: Int, numTerms: Int, numDocs: Long, numTokens: Long, alpha: Float, beta: Float, alphaAS: Float, storageLevel: StorageLevel)

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

def docVertices: VertexRDD[VD]

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def getCorpus: Graph[VD, ED]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

def isTraceEnabled(): Boolean

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

def mergeDuplicateTopic(threshold: Double = 0.95D): Map[Int, Int]

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def perplexity(): Double

\sum_{k} \frac{{\alpha }_{k}{\beta }_{w} + {n}_{kw}{\alpha }_{k} + {n}_{kd}{\beta }_{w} + {n}_{kw}{n}_{kd}} {{n}_{k}+\bar{\beta }} \frac{1}{\sum{{n}_{k}}+\bar{\alpha }}} \exp^{-(\sum{\log(p(w))})/N} N is the number of tokens in corpus

def runGibbsSampling(totalIter: Int, ChkptInterval: Int = 0, calcPerplexity: Boolean = false): Unit

def sampleTokens(corpus: Graph[VD, ED], totalTopicCounter: DenseVector[Count], pseudoIter: Int, numTokens: Long, numTopics: Int, numTerms: Int, alpha: Float, alphaAS: Float, beta: Float): Graph[VD, ED]

def saveModel(saveIter: Int = 1): DistributedLDAModel

def setAlpha(alpha: Float): LightLDA.this.type

def setAlphaAS(alphaAS: Float): LightLDA.this.type

def setBeta(beta: Float): LightLDA.this.type

def setSeed(newSeed: Int): LightLDA.this.type

def setStorageLevel(storageLevel: StorageLevel): LightLDA.this.type

final def synchronized[T0](arg0: ⇒ T0): T0

def termVertices: VertexRDD[VD]

def toString(): String

def tokenSampling(gen: Random, docTopicCounter: VD, termTopicCounter: VD, docProposal: Boolean, currentTopic: Int, proposalTopic: Int, q: (VD, Int, Boolean) ⇒ Float, p: (VD, VD, Int, Boolean) ⇒ Float): Int

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from LDA

Inherited from Logging

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped