(self, obv)
| 212 | self.optimizer = tf.optimizers.Adam(learning_rate=lr) |
| 213 | |
| 214 | def get_action(self, obv): |
| 215 | eps = epsilon(self.niter) |
| 216 | if args.train and random.random() < eps: |
| 217 | return int(random.random() * out_dim) |
| 218 | else: |
| 219 | obv = np.expand_dims(obv, 0).astype('float32') * ob_scale |
| 220 | qdist = np.exp(self._qvalues_func(obv).numpy()) |
| 221 | qvalues = (qdist * vrange).sum(-1) |
| 222 | return qvalues.argmax(1)[0] |
| 223 | |
| 224 | @tf.function |
| 225 | def _qvalues_func(self, obv): |
no test coverage detected