hub / github.com/tensorlayer/TensorLayer / PolicyNetwork

Class PolicyNetwork

examples/reinforcement_learning/tutorial_TD3.py:150–207 · view source on GitHub ↗

the network for generating non-determinstic (Gaussian distributed) action from the state input

Source from the content-addressed store, hash-verified

148
149
150	class PolicyNetwork(Model):
151	""" the network for generating non-determinstic (Gaussian distributed) action from the state input """
152
153	def __init__(self, num_inputs, num_actions, hidden_dim, action_range=1., init_w=3e-3):
154	super(PolicyNetwork, self).__init__()
155	w_init = tf.random_uniform_initializer(-init_w, init_w)
156
157	self.linear1 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=num_inputs, name='policy1')
158	self.linear2 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=hidden_dim, name='policy2')
159	self.linear3 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=hidden_dim, name='policy3')
160	self.output_linear = Dense(
161	n_units=num_actions, W_init=w_init, b_init=tf.random_uniform_initializer(-init_w, init_w),
162	in_channels=hidden_dim, name='policy_output'
163	)
164	self.action_range = action_range
165	self.num_actions = num_actions
166
167	def forward(self, state):
168	x = self.linear1(state)
169	x = self.linear2(x)
170	x = self.linear3(x)
171	output = tf.nn.tanh(self.output_linear(x)) # unit range output [-1, 1]
172	return output
173
174	def evaluate(self, state, eval_noise_scale):
175	"""
176	generate action with state for calculating gradients;
177	eval_noise_scale: as the trick of target policy smoothing, for generating noisy actions.
178	"""
179	state = state.astype(np.float32)
180	action = self.forward(state)
181
182	action = self.action_range * action
183
184	# add noise
185	normal = Normal(0, 1)
186	eval_noise_clip = 2 * eval_noise_scale
187	noise = normal.sample(action.shape) * eval_noise_scale
188	noise = tf.clip_by_value(noise, -eval_noise_clip, eval_noise_clip)
189	action = action + noise
190	return action
191
192	def get_action(self, state, explore_noise_scale, greedy=False):
193	""" generate action with state for interaction with envronment """
194	action = self.forward([state])
195	action = self.action_range * action.numpy()[0]
196	if greedy:
197	return action
198	# add noise
199	normal = Normal(0, 1)
200	noise = normal.sample(action.shape) * explore_noise_scale
201	action += noise
202	return action.numpy()
203
204	def sample_action(self):
205	""" generate random actions for exploration """
206	a = tf.random.uniform([self.num_actions], -1, 1)
207	return self.action_range * a.numpy()

Callers 1

__init__Method · 0.70

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…