hub / github.com/karpathy/neuraltalk / step

Method step

imagernn/solver.py:15–87 · view source on GitHub ↗

perform a single batch update. Takes as input: - batch of data (X) - model (W) - cost function which takes batch, model

(self, batch, model, cost_function, **kwargs)

Source from the content-addressed store, hash-verified

13	self.step_cache2_ = {} # might need this
14
15	def step(self, batch, model, cost_function, **kwargs):
16	"""
17	perform a single batch update. Takes as input:
18	- batch of data (X)
19	- model (W)
20	- cost function which takes batch, model
21	"""
22
23	learning_rate = kwargs.get('learning_rate', 0.0)
24	update = kwargs.get('update', model.keys())
25	grad_clip = kwargs.get('grad_clip', -1)
26	solver = kwargs.get('solver', 'vanilla')
27	momentum = kwargs.get('momentum', 0)
28	smooth_eps = kwargs.get('smooth_eps', 1e-8)
29	decay_rate = kwargs.get('decay_rate', 0.999)
30
31	if not (solver == 'vanilla' and momentum == 0):
32	# lazily make sure we initialize step cache if needed
33	for u in update:
34	if not u in self.step_cache_:
35	self.step_cache_[u] = np.zeros(model[u].shape)
36	if solver == 'adadelta':
37	self.step_cache2_[u] = np.zeros(model[u].shape) # adadelta needs one more cache
38
39	# compute cost and gradient
40	cg = cost_function(batch, model)
41	cost = cg['cost']
42	grads = cg['grad']
43	stats = cg['stats']
44
45	# clip gradients if needed, simplest possible version
46	# todo later: maybe implement the gradient direction conserving version
47	if grad_clip > 0:
48	for p in update:
49	if p in grads:
50	grads[p] = np.minimum(grads[p], grad_clip)
51	grads[p] = np.maximum(grads[p], -grad_clip)
52
53	# perform parameter update
54	for p in update:
55	if p in grads:
56
57	if solver == 'vanilla': # vanilla sgd, optional with momentum
58	if momentum > 0:
59	dx = momentum * self.step_cache_[p] - learning_rate * grads[p]
60	self.step_cache_[p] = dx
61	else:
62	dx = - learning_rate * grads[p]
63
64	elif solver == 'rmsprop':
65	self.step_cache_[p] = self.step_cache_[p] * decay_rate + (1.0 - decay_rate) * grads[p] ** 2
66	dx = -(learning_rate * grads[p]) / np.sqrt(self.step_cache_[p] + smooth_eps)
67
68	elif solver == 'adagrad':
69	self.step_cache_[p] += grads[p] ** 2
70	dx = -(learning_rate * grads[p]) / np.sqrt(self.step_cache_[p] + smooth_eps)
71
72	elif solver == 'adadelta':

Callers 1

mainFunction · 0.95

Calls

no outgoing calls

Tested by

no test coverage detected