r""" Update the agent parameters according to the rewards accrued on the current episode. Returns ------- avg_reward : float The average reward earned by the best `retain_prcnt` theta samples on the current episode.
(self)
| 85 | |
| 86 | @abstractmethod |
| 87 | def update(self): |
| 88 | r""" |
| 89 | Update the agent parameters according to the rewards accrued on the |
| 90 | current episode. |
| 91 | |
| 92 | Returns |
| 93 | ------- |
| 94 | avg_reward : float |
| 95 | The average reward earned by the best `retain_prcnt` theta samples |
| 96 | on the current episode. |
| 97 | """ |
| 98 | raise NotImplementedError |
| 99 | |
| 100 | |
| 101 | class CrossEntropyAgent(AgentBase): |
no outgoing calls