MCPcopy Index your code
hub / github.com/ddbourgin/numpy-ml / _init_params

Method _init_params

numpy_ml/rl_models/agents.py:1428–1478  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

1426 self._init_params()
1427
1428 def _init_params(self):
1429 E = self.env_info
1430 assert not E["continuous_actions"], "Action space must be discrete"
1431
1432 obs_encoder = None
1433 if E["continuous_observations"]:
1434 obs_encoder, _ = tile_state_space(
1435 self.env,
1436 self.env_info,
1437 self.n_tilings,
1438 state_action=False,
1439 obs_max=self.obs_max,
1440 obs_min=self.obs_min,
1441 grid_size=self.grid_dims,
1442 )
1443
1444 self._create_2num_dicts(obs_encoder=obs_encoder)
1445 self.behavior_policy = self.target_policy = self._epsilon_soft_policy
1446
1447 # initialize Q function and model
1448 self.parameters["Q"] = defaultdict(np.random.rand)
1449 self.parameters["model"] = EnvModel()
1450
1451 # initialize returns object for each state-action pair
1452 self.derived_variables = {
1453 "episode_num": 0,
1454 "sweep_queue": {},
1455 "visited": set(),
1456 "steps_since_last_visit": defaultdict(lambda: 0),
1457 }
1458
1459 if self.q_plus:
1460 self.derived_variables["steps_since_last_visit"] = defaultdict(
1461 np.random.rand,
1462 )
1463
1464 self.hyperparameters = {
1465 "agent": "DynaAgent",
1466 "lr": self.lr,
1467 "q_plus": self.q_plus,
1468 "obs_max": self.obs_max,
1469 "obs_min": self.obs_min,
1470 "epsilon": self.epsilon,
1471 "n_tilings": self.n_tilings,
1472 "grid_dims": self.grid_dims,
1473 "explore_weight": self.explore_weight,
1474 "temporal_discount": self.temporal_discount,
1475 "n_simulated_actions": self.n_simulated_actions,
1476 }
1477
1478 self.episode_history = {"state_actions": [], "rewards": []}
1479
1480 def act(self, obs):
1481 r"""

Callers 1

__init__Method · 0.95

Calls 3

tile_state_spaceFunction · 0.85
EnvModelClass · 0.85
_create_2num_dictsMethod · 0.80

Tested by

no test coverage detected