Class LDA

numpy_ml/lda/lda.py:5–247 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

3
4
5	class LDA(object):
6	def __init__(self, T=10):
7	"""
8	Vanilla (non-smoothed) LDA model trained using variational EM.
9	Generates maximum-likelihood estimates for model paramters
10	`alpha` and `beta`.
11
12	Parameters
13	----------
14	T : int
15	Number of topics
16
17	Attributes
18	----------
19	D : int
20	Number of documents
21	N : list of length `D`
22	Number of words in each document
23	V : int
24	Number of unique word tokens across all documents
25	phi : :py:class:`ndarray <numpy.ndarray>` of shape `(D, N[d], T)`
26	Variational approximation to word-topic distribution
27	gamma : :py:class:`ndarray <numpy.ndarray>` of shape `(D, T)`
28	Variational approximation to document-topic distribution
29	alpha : :py:class:`ndarray <numpy.ndarray>` of shape `(1, T)`
30	Parameter for the Dirichlet prior on the document-topic distribution
31	beta : :py:class:`ndarray <numpy.ndarray>` of shape `(V, T)`
32	Word-topic distribution
33	"""
34	self.T = T
35
36	def _maximize_phi(self):
37	"""
38	Optimize variational parameter phi
39	ϕ_{t, n} ∝ β_{t, w_n} e^( Ψ(γ_t) )
40	"""
41	D = self.D
42	N = self.N
43	T = self.T
44
45	phi = self.phi
46	beta = self.beta
47	gamma = self.gamma
48	corpus = self.corpus
49
50	for d in range(D):
51	for n in range(N[d]):
52	for t in range(T):
53	w_n = int(corpus[d][n])
54	phi[d][n, t] = beta[w_n, t] * np.exp(dg(gamma, d, t))
55
56	# Normalize over topics
57	phi[d][n, :] = phi[d][n, :] / np.sum(phi[d][n, :])
58	return phi
59
60	def _maximize_gamma(self):
61	"""
62	Optimize variational parameter gamma

plot_unsmoothedFunction · 0.90

no outgoing calls

no test coverage detected