MCPcopy Index your code
hub / github.com/lazyprogrammer/machine_learning_examples / Glove

Class Glove

nlp_class2/glove.py:31–236  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

29# 40 files --> half right but 50 is better
30
31class Glove:
32 def __init__(self, D, V, context_sz):
33 self.D = D
34 self.V = V
35 self.context_sz = context_sz
36
37 def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100, alpha=0.75, epochs=10, gd=False):
38 # build co-occurrence matrix
39 # paper calls it X, so we will call it X, instead of calling
40 # the training data X
41 # TODO: would it be better to use a sparse matrix?
42 t0 = datetime.now()
43 V = self.V
44 D = self.D
45
46 if not os.path.exists(cc_matrix):
47 X = np.zeros((V, V))
48 N = len(sentences)
49 print("number of sentences to process:", N)
50 it = 0
51 for sentence in sentences:
52 it += 1
53 if it % 10000 == 0:
54 print("processed", it, "/", N)
55 n = len(sentence)
56 for i in range(n):
57 # i is not the word index!!!
58 # j is not the word index!!!
59 # i just points to which element of the sequence (sentence) we're looking at
60 wi = sentence[i]
61
62 start = max(0, i - self.context_sz)
63 end = min(n, i + self.context_sz)
64
65 # we can either choose only one side as context, or both
66 # here we are doing both
67
68 # make sure "start" and "end" tokens are part of some context
69 # otherwise their f(X) will be 0 (denominator in bias update)
70 if i - self.context_sz < 0:
71 points = 1.0 / (i + 1)
72 X[wi,0] += points
73 X[0,wi] += points
74 if i + self.context_sz > n:
75 points = 1.0 / (n - i)
76 X[wi,1] += points
77 X[1,wi] += points
78
79 # left side
80 for j in range(start, i):
81 wj = sentence[j]
82 points = 1.0 / (i - j) # this is +ve
83 X[wi,wj] += points
84 X[wj,wi] += points
85
86 # right side
87 for j in range(i + 1, end):
88 wj = sentence[j]

Callers 1

mainFunction · 0.70

Calls

no outgoing calls

Tested by

no test coverage detected