MCPcopy Index your code
hub / github.com/tensorlayer/TensorLayer / PolicyNetwork

Class PolicyNetwork

examples/reinforcement_learning/tutorial_TD3.py:150–207  ·  view source on GitHub ↗

the network for generating non-determinstic (Gaussian distributed) action from the state input

Source from the content-addressed store, hash-verified

148
149
150class PolicyNetwork(Model):
151 """ the network for generating non-determinstic (Gaussian distributed) action from the state input """
152
153 def __init__(self, num_inputs, num_actions, hidden_dim, action_range=1., init_w=3e-3):
154 super(PolicyNetwork, self).__init__()
155 w_init = tf.random_uniform_initializer(-init_w, init_w)
156
157 self.linear1 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=num_inputs, name='policy1')
158 self.linear2 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=hidden_dim, name='policy2')
159 self.linear3 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=hidden_dim, name='policy3')
160 self.output_linear = Dense(
161 n_units=num_actions, W_init=w_init, b_init=tf.random_uniform_initializer(-init_w, init_w),
162 in_channels=hidden_dim, name='policy_output'
163 )
164 self.action_range = action_range
165 self.num_actions = num_actions
166
167 def forward(self, state):
168 x = self.linear1(state)
169 x = self.linear2(x)
170 x = self.linear3(x)
171 output = tf.nn.tanh(self.output_linear(x)) # unit range output [-1, 1]
172 return output
173
174 def evaluate(self, state, eval_noise_scale):
175 """
176 generate action with state for calculating gradients;
177 eval_noise_scale: as the trick of target policy smoothing, for generating noisy actions.
178 """
179 state = state.astype(np.float32)
180 action = self.forward(state)
181
182 action = self.action_range * action
183
184 # add noise
185 normal = Normal(0, 1)
186 eval_noise_clip = 2 * eval_noise_scale
187 noise = normal.sample(action.shape) * eval_noise_scale
188 noise = tf.clip_by_value(noise, -eval_noise_clip, eval_noise_clip)
189 action = action + noise
190 return action
191
192 def get_action(self, state, explore_noise_scale, greedy=False):
193 """ generate action with state for interaction with envronment """
194 action = self.forward([state])
195 action = self.action_range * action.numpy()[0]
196 if greedy:
197 return action
198 # add noise
199 normal = Normal(0, 1)
200 noise = normal.sample(action.shape) * explore_noise_scale
201 action += noise
202 return action.numpy()
203
204 def sample_action(self):
205 """ generate random actions for exploration """
206 a = tf.random.uniform([self.num_actions], -1, 1)
207 return self.action_range * a.numpy()

Callers 1

__init__Method · 0.70

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…