* Get policy-network logits and the action based on state-tensor inputs. * * @param {tf.Tensor} inputs A tf.Tensor instance of shape `[batchSize, 4]`. * @returns {[tf.Tensor, tf.Tensor]} * 1. The logits tensor, of shape `[batchSize, 1]`. * 2. The actions tensor, of shape `[batchSi
(inputs)
| 202 | * 2. The actions tensor, of shape `[batchSize, 1]`. |
| 203 | */ |
| 204 | getLogitsAndActions(inputs) { |
| 205 | return tf.tidy(() => { |
| 206 | const logits = this.policyNet.predict(inputs); |
| 207 | |
| 208 | // Get the probability of the leftward action. |
| 209 | const leftProb = tf.sigmoid(logits); |
| 210 | // Probabilities of the left and right actions. |
| 211 | const leftRightProbs = tf.concat([leftProb, tf.sub(1, leftProb)], 1); |
| 212 | const actions = tf.multinomial(leftRightProbs, 1, null, true); |
| 213 | return [logits, actions]; |
| 214 | }); |
| 215 | } |
| 216 | |
| 217 | /** |
| 218 | * Get actions based on a state-tensor input. |
no test coverage detected