hub / github.com/google-deepmind/acme / __init__

Method init

acme/agents/tf/d4pg/agent.py:252–365 · view source on GitHub ↗

Initialize the agent. Args: environment_spec: description of the actions, observations, etc. policy_network: the online (optimized) policy. critic_network: the online critic. observation_network: optional network to transform the observations before they are fed

(
      self,
      environment_spec: specs.EnvironmentSpec,
      policy_network: snt.Module,
      critic_network: snt.Module,
      observation_network: types.TensorTransformation = tf.identity,
      discount: float = 0.99,
      batch_size: int = 256,
      prefetch_size: int = 4,
      target_update_period: int = 100,
      policy_optimizer: Optional[snt.Optimizer] = None,
      critic_optimizer: Optional[snt.Optimizer] = None,
      min_replay_size: int = 1000,
      max_replay_size: int = 1000000,
      samples_per_insert: float = 32.0,
      n_step: int = 5,
      sigma: float = 0.3,
      clipping: bool = True,
      replay_table_name: str = reverb_adders.DEFAULT_PRIORITY_TABLE,
      counter: Optional[counting.Counter] = None,
      logger: Optional[loggers.Logger] = None,
      checkpoint: bool = True,
  )

Source from the content-addressed store, hash-verified

250	"""
251
252	def __init__(
253	self,
254	environment_spec: specs.EnvironmentSpec,
255	policy_network: snt.Module,
256	critic_network: snt.Module,
257	observation_network: types.TensorTransformation = tf.identity,
258	discount: float = 0.99,
259	batch_size: int = 256,
260	prefetch_size: int = 4,
261	target_update_period: int = 100,
262	policy_optimizer: Optional[snt.Optimizer] = None,
263	critic_optimizer: Optional[snt.Optimizer] = None,
264	min_replay_size: int = 1000,
265	max_replay_size: int = 1000000,
266	samples_per_insert: float = 32.0,
267	n_step: int = 5,
268	sigma: float = 0.3,
269	clipping: bool = True,
270	replay_table_name: str = reverb_adders.DEFAULT_PRIORITY_TABLE,
271	counter: Optional[counting.Counter] = None,
272	logger: Optional[loggers.Logger] = None,
273	checkpoint: bool = True,
274	):
275	"""Initialize the agent.
276
277	Args:
278	environment_spec: description of the actions, observations, etc.
279	policy_network: the online (optimized) policy.
280	critic_network: the online critic.
281	observation_network: optional network to transform the observations before
282	they are fed into any network.
283	discount: discount to use for TD updates.
284	batch_size: batch size for updates.
285	prefetch_size: size to prefetch from replay.
286	target_update_period: number of learner steps to perform before updating
287	the target networks.
288	policy_optimizer: optimizer for the policy network updates.
289	critic_optimizer: optimizer for the critic network updates.
290	min_replay_size: minimum replay size before updating.
291	max_replay_size: maximum replay size.
292	samples_per_insert: number of samples to take from replay for every insert
293	that is made.
294	n_step: number of steps to squash into a single transition.
295	sigma: standard deviation of zero-mean, Gaussian exploration noise.
296	clipping: whether to clip gradients by global norm.
297	replay_table_name: string indicating what name to give the replay table.
298	counter: counter object used to keep track of steps.
299	logger: logger object to be used by learner.
300	checkpoint: boolean indicating whether to checkpoint the learner.
301	"""
302	# Create the Builder object which will internally create agent components.
303	builder = D4PGBuilder(
304	# TODO(mwhoffman): pass the config dataclass in directly.
305	# TODO(mwhoffman): use the limiter rather than the workaround below.
306	# Right now this modifies min_replay_size and samples_per_insert so that
307	# they are not controlled by a limiter and are instead handled by the
308	# Agent base class (the above TODO directly references this behavior).
309	D4PGConfig(

Callers

nothing calls this directly

Calls 10

initMethod · 0.95

make_policyMethod · 0.95

make_replay_tablesMethod · 0.95

make_adderMethod · 0.95

make_actorMethod · 0.95

make_dataset_iteratorMethod · 0.95

make_learnerMethod · 0.95

D4PGBuilderClass · 0.70

D4PGConfigClass · 0.70

D4PGNetworksClass · 0.70

Tested by

no test coverage detected

Method __init__

Source from the content-addressed store, hash-verified

Callers

Calls 10

Tested by

Method init