Class SoftQNetwork

examples/reinforcement_learning/tutorial_SAC.py:118–137 · view source on GitHub ↗

the network for evaluate values of state-action pairs: Q(s,a)

Source from the content-addressed store, hash-verified

116
117
118	class SoftQNetwork(Model):
119	""" the network for evaluate values of state-action pairs: Q(s,a) """
120
121	def __init__(self, num_inputs, num_actions, hidden_dim, init_w=3e-3):
122	super(SoftQNetwork, self).__init__()
123	input_dim = num_inputs + num_actions
124	w_init = tf.keras.initializers.glorot_normal(
125	seed=None
126	) # glorot initialization is better than uniform in practice
127	# w_init = tf.random_uniform_initializer(-init_w, init_w)
128
129	self.linear1 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=input_dim, name='q1')
130	self.linear2 = Dense(n_units=hidden_dim, act=tf.nn.relu, W_init=w_init, in_channels=hidden_dim, name='q2')
131	self.linear3 = Dense(n_units=1, W_init=w_init, in_channels=hidden_dim, name='q3')
132
133	def forward(self, input):
134	x = self.linear1(input)
135	x = self.linear2(x)
136	x = self.linear3(x)
137	return x
138
139
140	class PolicyNetwork(Model):

__init__Method · 0.85

no outgoing calls

no test coverage detected

searching dependent graphs…