hub / github.com/lazyprogrammer/machine_learning_examples / Glove

Class Glove

nlp_class2/glove.py:31–236 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

29	# 40 files --> half right but 50 is better
30
31	class Glove:
32	def __init__(self, D, V, context_sz):
33	self.D = D
34	self.V = V
35	self.context_sz = context_sz
36
37	def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100, alpha=0.75, epochs=10, gd=False):
38	# build co-occurrence matrix
39	# paper calls it X, so we will call it X, instead of calling
40	# the training data X
41	# TODO: would it be better to use a sparse matrix?
42	t0 = datetime.now()
43	V = self.V
44	D = self.D
45
46	if not os.path.exists(cc_matrix):
47	X = np.zeros((V, V))
48	N = len(sentences)
49	print("number of sentences to process:", N)
50	it = 0
51	for sentence in sentences:
52	it += 1
53	if it % 10000 == 0:
54	print("processed", it, "/", N)
55	n = len(sentence)
56	for i in range(n):
57	# i is not the word index!!!
58	# j is not the word index!!!
59	# i just points to which element of the sequence (sentence) we're looking at
60	wi = sentence[i]
61
62	start = max(0, i - self.context_sz)
63	end = min(n, i + self.context_sz)
64
65	# we can either choose only one side as context, or both
66	# here we are doing both
67
68	# make sure "start" and "end" tokens are part of some context
69	# otherwise their f(X) will be 0 (denominator in bias update)
70	if i - self.context_sz < 0:
71	points = 1.0 / (i + 1)
72	X[wi,0] += points
73	X[0,wi] += points
74	if i + self.context_sz > n:
75	points = 1.0 / (n - i)
76	X[wi,1] += points
77	X[1,wi] += points
78
79	# left side
80	for j in range(start, i):
81	wj = sentence[j]
82	points = 1.0 / (i - j) # this is +ve
83	X[wi,wj] += points
84	X[wj,wi] += points
85
86	# right side
87	for j in range(i + 1, end):
88	wj = sentence[j]

Callers 1

mainFunction · 0.70

Calls

no outgoing calls

Tested by

no test coverage detected