MCPcopy Index your code
hub / github.com/lazyprogrammer/machine_learning_examples / ANN

Class ANN

rl3/es_flappy.py:78–126  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

76 return x * (x > 0)
77
78class ANN:
79 def __init__(self, D, M, K, f=relu):
80 self.D = D
81 self.M = M
82 self.K = K
83 self.f = f
84
85 def init(self):
86 D, M, K = self.D, self.M, self.K
87 self.W1 = np.random.randn(D, M) / np.sqrt(D)
88 # self.W1 = np.zeros((D, M))
89 self.b1 = np.zeros(M)
90 self.W2 = np.random.randn(M, K) / np.sqrt(M)
91 # self.W2 = np.zeros((M, K))
92 self.b2 = np.zeros(K)
93
94 def forward(self, X):
95 Z = self.f(X.dot(self.W1) + self.b1)
96 return softmax(Z.dot(self.W2) + self.b2)
97
98 def sample_action(self, x):
99 # assume input is a single state of size (D,)
100 # first make it (N, D) to fit ML conventions
101 X = np.atleast_2d(x)
102 P = self.forward(X)
103 p = P[0] # the first row
104 # return np.random.choice(len(p), p=p)
105 return np.argmax(p)
106
107 def get_params(self):
108 # return a flat array of parameters
109 return np.concatenate([self.W1.flatten(), self.b1, self.W2.flatten(), self.b2])
110
111 def get_params_dict(self):
112 return {
113 'W1': self.W1,
114 'b1': self.b1,
115 'W2': self.W2,
116 'b2': self.b2,
117 }
118
119 def set_params(self, params):
120 # params is a flat list
121 # unflatten into individual weights
122 D, M, K = self.D, self.M, self.K
123 self.W1 = params[:D * M].reshape(D, M)
124 self.b1 = params[D * M:D * M + M]
125 self.W2 = params[D * M + M:D * M + M + M * K].reshape(M, K)
126 self.b2 = params[-K:]
127
128
129def evolution_strategy(

Callers 2

reward_functionFunction · 0.70
es_flappy.pyFile · 0.70

Calls

no outgoing calls

Tested by

no test coverage detected