MCPcopy Index your code
hub / github.com/tensorlayer/TensorLayer / SoftQNetwork

Class SoftQNetwork

examples/reinforcement_learning/tutorial_SAC.py:118–137  ·  view source on GitHub ↗

the network for evaluate values of state-action pairs: Q(s,a)

Source from the content-addressed store, hash-verified

116
117
118class SoftQNetwork(Model):
119 """ the network for evaluate values of state-action pairs: Q(s,a) """
120
121 def __init__(self, num_inputs, num_actions, hidden_dim, init_w=3e-3):
122 super(SoftQNetwork, self).__init__()
123 input_dim = num_inputs + num_actions
124 w_init = tf.keras.initializers.glorot_normal(
125 seed=None
126 ) # glorot initialization is better than uniform in practice
127 # w_init = tf.random_uniform_initializer(-init_w, init_w)
128
129 self.linear1 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=input_dim, name='q1')
130 self.linear2 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=hidden_dim, name='q2')
131 self.linear3 = Dense(n_units=1, W_init=w_init, in_channels=hidden_dim, name='q3')
132
133 def forward(self, input):
134 x = self.linear1(input)
135 x = self.linear2(x)
136 x = self.linear3(x)
137 return x
138
139
140class PolicyNetwork(Model):

Callers 1

__init__Method · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…