MCPcopy
hub / github.com/tensorlayer/TensorLayer / DDPG

Class DDPG

examples/reinforcement_learning/tutorial_DDPG.py:69–233  ·  view source on GitHub ↗

DDPG class

Source from the content-addressed store, hash-verified

67
68
69class DDPG(object):
70 """
71 DDPG class
72 """
73
74 def __init__(self, action_dim, state_dim, action_range):
75 self.memory = np.zeros((MEMORY_CAPACITY, state_dim * 2 + action_dim + 1), dtype=np.float32)
76 self.pointer = 0
77 self.action_dim, self.state_dim, self.action_range = action_dim, state_dim, action_range
78 self.var = VAR
79
80 W_init = tf.random_normal_initializer(mean=0, stddev=0.3)
81 b_init = tf.constant_initializer(0.1)
82
83 def get_actor(input_state_shape, name=''):
84 """
85 Build actor network
86 :param input_state_shape: state
87 :param name: name
88 :return: act
89 """
90 input_layer = tl.layers.Input(input_state_shape, name='A_input')
91 layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name='A_l1')(input_layer)
92 layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name='A_l2')(layer)
93 layer = tl.layers.Dense(n_units=action_dim, act=tf.nn.tanh, W_init=W_init, b_init=b_init, name='A_a')(layer)
94 layer = tl.layers.Lambda(lambda x: action_range * x)(layer)
95 return tl.models.Model(inputs=input_layer, outputs=layer, name='Actor' + name)
96
97 def get_critic(input_state_shape, input_action_shape, name=''):
98 """
99 Build critic network
100 :param input_state_shape: state
101 :param input_action_shape: act
102 :param name: name
103 :return: Q value Q(s,a)
104 """
105 state_input = tl.layers.Input(input_state_shape, name='C_s_input')
106 action_input = tl.layers.Input(input_action_shape, name='C_a_input')
107 layer = tl.layers.Concat(1)([state_input, action_input])
108 layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name='C_l1')(layer)
109 layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name='C_l2')(layer)
110 layer = tl.layers.Dense(n_units=1, W_init=W_init, b_init=b_init, name='C_out')(layer)
111 return tl.models.Model(inputs=[state_input, action_input], outputs=layer, name='Critic' + name)
112
113 self.actor = get_actor([None, state_dim])
114 self.critic = get_critic([None, state_dim], [None, action_dim])
115 self.actor.train()
116 self.critic.train()
117
118 def copy_para(from_model, to_model):
119 """
120 Copy parameters for soft updating
121 :param from_model: latest model
122 :param to_model: target model
123 :return: None
124 """
125 for i, j in zip(from_model.trainable_weights, to_model.trainable_weights):
126 j.assign(i)

Callers 1

tutorial_DDPG.pyFile · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…