| 76 | return x * (x > 0) |
| 77 | |
| 78 | class ANN: |
| 79 | def __init__(self, D, M, K, f=relu): |
| 80 | self.D = D |
| 81 | self.M = M |
| 82 | self.K = K |
| 83 | self.f = f |
| 84 | |
| 85 | def init(self): |
| 86 | D, M, K = self.D, self.M, self.K |
| 87 | self.W1 = np.random.randn(D, M) / np.sqrt(D) |
| 88 | # self.W1 = np.zeros((D, M)) |
| 89 | self.b1 = np.zeros(M) |
| 90 | self.W2 = np.random.randn(M, K) / np.sqrt(M) |
| 91 | # self.W2 = np.zeros((M, K)) |
| 92 | self.b2 = np.zeros(K) |
| 93 | |
| 94 | def forward(self, X): |
| 95 | Z = self.f(X.dot(self.W1) + self.b1) |
| 96 | return softmax(Z.dot(self.W2) + self.b2) |
| 97 | |
| 98 | def sample_action(self, x): |
| 99 | # assume input is a single state of size (D,) |
| 100 | # first make it (N, D) to fit ML conventions |
| 101 | X = np.atleast_2d(x) |
| 102 | P = self.forward(X) |
| 103 | p = P[0] # the first row |
| 104 | # return np.random.choice(len(p), p=p) |
| 105 | return np.argmax(p) |
| 106 | |
| 107 | def get_params(self): |
| 108 | # return a flat array of parameters |
| 109 | return np.concatenate([self.W1.flatten(), self.b1, self.W2.flatten(), self.b2]) |
| 110 | |
| 111 | def get_params_dict(self): |
| 112 | return { |
| 113 | 'W1': self.W1, |
| 114 | 'b1': self.b1, |
| 115 | 'W2': self.W2, |
| 116 | 'b2': self.b2, |
| 117 | } |
| 118 | |
| 119 | def set_params(self, params): |
| 120 | # params is a flat list |
| 121 | # unflatten into individual weights |
| 122 | D, M, K = self.D, self.M, self.K |
| 123 | self.W1 = params[:D * M].reshape(D, M) |
| 124 | self.b1 = params[D * M:D * M + M] |
| 125 | self.W2 = params[D * M + M:D * M + M + M * K].reshape(M, K) |
| 126 | self.b2 = params[-K:] |
| 127 | |
| 128 | |
| 129 | def evolution_strategy( |
no outgoing calls
no test coverage detected