| 131 | """ the network for evaluate values of state-action pairs: Q(s,a) """ |
| 132 | |
| 133 | def __init__(self, num_inputs, num_actions, hidden_dim, init_w=3e-3): |
| 134 | super(QNetwork, self).__init__() |
| 135 | input_dim = num_inputs + num_actions |
| 136 | # w_init = tf.keras.initializers.glorot_normal(seed=None) |
| 137 | w_init = tf.random_uniform_initializer(-init_w, init_w) |
| 138 | |
| 139 | self.linear1 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=input_dim, name='q1') |
| 140 | self.linear2 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=hidden_dim, name='q2') |
| 141 | self.linear3 = Dense(n_units=1, W_init=w_init, in_channels=hidden_dim, name='q3') |
| 142 | |
| 143 | def forward(self, input): |
| 144 | x = self.linear1(input) |