Method init

examples/reinforcement_learning/tutorial_PPO.py:74–109 · view source on GitHub ↗

(self, state_dim, action_dim, action_bound, method='clip')

Source from the content-addressed store, hash-verified

72	PPO class
73	"""
74	def __init__(self, state_dim, action_dim, action_bound, method='clip'):
75	# critic
76	with tf.name_scope('critic'):
77	inputs = tl.layers.Input([None, state_dim], tf.float32, 'state')
78	layer = tl.layers.Dense(64, tf.nn.relu)(inputs)
79	layer = tl.layers.Dense(64, tf.nn.relu)(layer)
80	v = tl.layers.Dense(1)(layer)
81	self.critic = tl.models.Model(inputs, v)
82	self.critic.train()
83
84	# actor
85	with tf.name_scope('actor'):
86	inputs = tl.layers.Input([None, state_dim], tf.float32, 'state')
87	layer = tl.layers.Dense(64, tf.nn.relu)(inputs)
88	layer = tl.layers.Dense(64, tf.nn.relu)(layer)
89	a = tl.layers.Dense(action_dim, tf.nn.tanh)(layer)
90	mean = tl.layers.Lambda(lambda x: x * action_bound, name='lambda')(a)
91	logstd = tf.Variable(np.zeros(action_dim, dtype=np.float32))
92	self.actor = tl.models.Model(inputs, mean)
93	self.actor.trainable_weights.append(logstd)
94	self.actor.logstd = logstd
95	self.actor.train()
96
97	self.actor_opt = tf.optimizers.Adam(LR_A)
98	self.critic_opt = tf.optimizers.Adam(LR_C)
99
100	self.method = method
101	if method == 'penalty':
102	self.kl_target = KL_TARGET
103	self.lam = LAM
104	elif method == 'clip':
105	self.epsilon = EPSILON
106
107	self.state_buffer, self.action_buffer = [], []
108	self.reward_buffer, self.cumulative_reward_buffer = [], []
109	self.action_bound = action_bound
110
111	def train_actor(self, state, action, adv, old_pi):
112	"""

nothing calls this directly

trainMethod · 0.45

no test coverage detected