MCPcopy Index your code
hub / github.com/ddbourgin/numpy-ml / LDA

Class LDA

numpy_ml/lda/lda.py:5–247  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

3
4
5class LDA(object):
6 def __init__(self, T=10):
7 """
8 Vanilla (non-smoothed) LDA model trained using variational EM.
9 Generates maximum-likelihood estimates for model paramters
10 `alpha` and `beta`.
11
12 Parameters
13 ----------
14 T : int
15 Number of topics
16
17 Attributes
18 ----------
19 D : int
20 Number of documents
21 N : list of length `D`
22 Number of words in each document
23 V : int
24 Number of unique word tokens across all documents
25 phi : :py:class:`ndarray <numpy.ndarray>` of shape `(D, N[d], T)`
26 Variational approximation to word-topic distribution
27 gamma : :py:class:`ndarray <numpy.ndarray>` of shape `(D, T)`
28 Variational approximation to document-topic distribution
29 alpha : :py:class:`ndarray <numpy.ndarray>` of shape `(1, T)`
30 Parameter for the Dirichlet prior on the document-topic distribution
31 beta : :py:class:`ndarray <numpy.ndarray>` of shape `(V, T)`
32 Word-topic distribution
33 """
34 self.T = T
35
36 def _maximize_phi(self):
37 """
38 Optimize variational parameter phi
39 ϕ_{t, n} ∝ β_{t, w_n} e^( Ψ(γ_t) )
40 """
41 D = self.D
42 N = self.N
43 T = self.T
44
45 phi = self.phi
46 beta = self.beta
47 gamma = self.gamma
48 corpus = self.corpus
49
50 for d in range(D):
51 for n in range(N[d]):
52 for t in range(T):
53 w_n = int(corpus[d][n])
54 phi[d][n, t] = beta[w_n, t] * np.exp(dg(gamma, d, t))
55
56 # Normalize over topics
57 phi[d][n, :] = phi[d][n, :] / np.sum(phi[d][n, :])
58 return phi
59
60 def _maximize_gamma(self):
61 """
62 Optimize variational parameter gamma

Callers 1

plot_unsmoothedFunction · 0.90

Calls

no outgoing calls

Tested by

no test coverage detected