(self)
| 1426 | self._init_params() |
| 1427 | |
| 1428 | def _init_params(self): |
| 1429 | E = self.env_info |
| 1430 | assert not E["continuous_actions"], "Action space must be discrete" |
| 1431 | |
| 1432 | obs_encoder = None |
| 1433 | if E["continuous_observations"]: |
| 1434 | obs_encoder, _ = tile_state_space( |
| 1435 | self.env, |
| 1436 | self.env_info, |
| 1437 | self.n_tilings, |
| 1438 | state_action=False, |
| 1439 | obs_max=self.obs_max, |
| 1440 | obs_min=self.obs_min, |
| 1441 | grid_size=self.grid_dims, |
| 1442 | ) |
| 1443 | |
| 1444 | self._create_2num_dicts(obs_encoder=obs_encoder) |
| 1445 | self.behavior_policy = self.target_policy = self._epsilon_soft_policy |
| 1446 | |
| 1447 | # initialize Q function and model |
| 1448 | self.parameters["Q"] = defaultdict(np.random.rand) |
| 1449 | self.parameters["model"] = EnvModel() |
| 1450 | |
| 1451 | # initialize returns object for each state-action pair |
| 1452 | self.derived_variables = { |
| 1453 | "episode_num": 0, |
| 1454 | "sweep_queue": {}, |
| 1455 | "visited": set(), |
| 1456 | "steps_since_last_visit": defaultdict(lambda: 0), |
| 1457 | } |
| 1458 | |
| 1459 | if self.q_plus: |
| 1460 | self.derived_variables["steps_since_last_visit"] = defaultdict( |
| 1461 | np.random.rand, |
| 1462 | ) |
| 1463 | |
| 1464 | self.hyperparameters = { |
| 1465 | "agent": "DynaAgent", |
| 1466 | "lr": self.lr, |
| 1467 | "q_plus": self.q_plus, |
| 1468 | "obs_max": self.obs_max, |
| 1469 | "obs_min": self.obs_min, |
| 1470 | "epsilon": self.epsilon, |
| 1471 | "n_tilings": self.n_tilings, |
| 1472 | "grid_dims": self.grid_dims, |
| 1473 | "explore_weight": self.explore_weight, |
| 1474 | "temporal_discount": self.temporal_discount, |
| 1475 | "n_simulated_actions": self.n_simulated_actions, |
| 1476 | } |
| 1477 | |
| 1478 | self.episode_history = {"state_actions": [], "rewards": []} |
| 1479 | |
| 1480 | def act(self, obs): |
| 1481 | r""" |
no test coverage detected