MCPcopy
hub / github.com/tensorlayer/TensorLayer / __init__

Method __init__

examples/reinforcement_learning/tutorial_PPO.py:74–109  ·  view source on GitHub ↗
(self, state_dim, action_dim, action_bound, method='clip')

Source from the content-addressed store, hash-verified

72 PPO class
73 """
74 def __init__(self, state_dim, action_dim, action_bound, method='clip'):
75 # critic
76 with tf.name_scope('critic'):
77 inputs = tl.layers.Input([None, state_dim], tf.float32, 'state')
78 layer = tl.layers.Dense(64, tf.nn.relu)(inputs)
79 layer = tl.layers.Dense(64, tf.nn.relu)(layer)
80 v = tl.layers.Dense(1)(layer)
81 self.critic = tl.models.Model(inputs, v)
82 self.critic.train()
83
84 # actor
85 with tf.name_scope('actor'):
86 inputs = tl.layers.Input([None, state_dim], tf.float32, 'state')
87 layer = tl.layers.Dense(64, tf.nn.relu)(inputs)
88 layer = tl.layers.Dense(64, tf.nn.relu)(layer)
89 a = tl.layers.Dense(action_dim, tf.nn.tanh)(layer)
90 mean = tl.layers.Lambda(lambda x: x * action_bound, name='lambda')(a)
91 logstd = tf.Variable(np.zeros(action_dim, dtype=np.float32))
92 self.actor = tl.models.Model(inputs, mean)
93 self.actor.trainable_weights.append(logstd)
94 self.actor.logstd = logstd
95 self.actor.train()
96
97 self.actor_opt = tf.optimizers.Adam(LR_A)
98 self.critic_opt = tf.optimizers.Adam(LR_C)
99
100 self.method = method
101 if method == 'penalty':
102 self.kl_target = KL_TARGET
103 self.lam = LAM
104 elif method == 'clip':
105 self.epsilon = EPSILON
106
107 self.state_buffer, self.action_buffer = [], []
108 self.reward_buffer, self.cumulative_reward_buffer = [], []
109 self.action_bound = action_bound
110
111 def train_actor(self, state, action, adv, old_pi):
112 """

Callers

nothing calls this directly

Calls 1

trainMethod · 0.45

Tested by

no test coverage detected