the network for generating non-determinstic (Gaussian distributed) action from the state input
| 148 | |
| 149 | |
| 150 | class PolicyNetwork(Model): |
| 151 | """ the network for generating non-determinstic (Gaussian distributed) action from the state input """ |
| 152 | |
| 153 | def __init__(self, num_inputs, num_actions, hidden_dim, action_range=1., init_w=3e-3): |
| 154 | super(PolicyNetwork, self).__init__() |
| 155 | w_init = tf.random_uniform_initializer(-init_w, init_w) |
| 156 | |
| 157 | self.linear1 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=num_inputs, name='policy1') |
| 158 | self.linear2 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=hidden_dim, name='policy2') |
| 159 | self.linear3 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=hidden_dim, name='policy3') |
| 160 | self.output_linear = Dense( |
| 161 | n_units=num_actions, W_init=w_init, b_init=tf.random_uniform_initializer(-init_w, init_w), |
| 162 | in_channels=hidden_dim, name='policy_output' |
| 163 | ) |
| 164 | self.action_range = action_range |
| 165 | self.num_actions = num_actions |
| 166 | |
| 167 | def forward(self, state): |
| 168 | x = self.linear1(state) |
| 169 | x = self.linear2(x) |
| 170 | x = self.linear3(x) |
| 171 | output = tf.nn.tanh(self.output_linear(x)) # unit range output [-1, 1] |
| 172 | return output |
| 173 | |
| 174 | def evaluate(self, state, eval_noise_scale): |
| 175 | """ |
| 176 | generate action with state for calculating gradients; |
| 177 | eval_noise_scale: as the trick of target policy smoothing, for generating noisy actions. |
| 178 | """ |
| 179 | state = state.astype(np.float32) |
| 180 | action = self.forward(state) |
| 181 | |
| 182 | action = self.action_range * action |
| 183 | |
| 184 | # add noise |
| 185 | normal = Normal(0, 1) |
| 186 | eval_noise_clip = 2 * eval_noise_scale |
| 187 | noise = normal.sample(action.shape) * eval_noise_scale |
| 188 | noise = tf.clip_by_value(noise, -eval_noise_clip, eval_noise_clip) |
| 189 | action = action + noise |
| 190 | return action |
| 191 | |
| 192 | def get_action(self, state, explore_noise_scale, greedy=False): |
| 193 | """ generate action with state for interaction with envronment """ |
| 194 | action = self.forward([state]) |
| 195 | action = self.action_range * action.numpy()[0] |
| 196 | if greedy: |
| 197 | return action |
| 198 | # add noise |
| 199 | normal = Normal(0, 1) |
| 200 | noise = normal.sample(action.shape) * explore_noise_scale |
| 201 | action += noise |
| 202 | return action.numpy() |
| 203 | |
| 204 | def sample_action(self): |
| 205 | """ generate random actions for exploration """ |
| 206 | a = tf.random.uniform([self.num_actions], -1, 1) |
| 207 | return self.action_range * a.numpy() |
no outgoing calls
no test coverage detected
searching dependent graphs…