DDPG class
| 67 | |
| 68 | |
| 69 | class DDPG(object): |
| 70 | """ |
| 71 | DDPG class |
| 72 | """ |
| 73 | |
| 74 | def __init__(self, action_dim, state_dim, action_range): |
| 75 | self.memory = np.zeros((MEMORY_CAPACITY, state_dim * 2 + action_dim + 1), dtype=np.float32) |
| 76 | self.pointer = 0 |
| 77 | self.action_dim, self.state_dim, self.action_range = action_dim, state_dim, action_range |
| 78 | self.var = VAR |
| 79 | |
| 80 | W_init = tf.random_normal_initializer(mean=0, stddev=0.3) |
| 81 | b_init = tf.constant_initializer(0.1) |
| 82 | |
| 83 | def get_actor(input_state_shape, name=''): |
| 84 | """ |
| 85 | Build actor network |
| 86 | :param input_state_shape: state |
| 87 | :param name: name |
| 88 | :return: act |
| 89 | """ |
| 90 | input_layer = tl.layers.Input(input_state_shape, name='A_input') |
| 91 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name='A_l1')(input_layer) |
| 92 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name='A_l2')(layer) |
| 93 | layer = tl.layers.Dense(n_units=action_dim, act=tf.nn.tanh, W_init=W_init, b_init=b_init, name='A_a')(layer) |
| 94 | layer = tl.layers.Lambda(lambda x: action_range * x)(layer) |
| 95 | return tl.models.Model(inputs=input_layer, outputs=layer, name='Actor' + name) |
| 96 | |
| 97 | def get_critic(input_state_shape, input_action_shape, name=''): |
| 98 | """ |
| 99 | Build critic network |
| 100 | :param input_state_shape: state |
| 101 | :param input_action_shape: act |
| 102 | :param name: name |
| 103 | :return: Q value Q(s,a) |
| 104 | """ |
| 105 | state_input = tl.layers.Input(input_state_shape, name='C_s_input') |
| 106 | action_input = tl.layers.Input(input_action_shape, name='C_a_input') |
| 107 | layer = tl.layers.Concat(1)([state_input, action_input]) |
| 108 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name='C_l1')(layer) |
| 109 | layer = tl.layers.Dense(n_units=64, act=tf.nn.relu, W_init=W_init, b_init=b_init, name='C_l2')(layer) |
| 110 | layer = tl.layers.Dense(n_units=1, W_init=W_init, b_init=b_init, name='C_out')(layer) |
| 111 | return tl.models.Model(inputs=[state_input, action_input], outputs=layer, name='Critic' + name) |
| 112 | |
| 113 | self.actor = get_actor([None, state_dim]) |
| 114 | self.critic = get_critic([None, state_dim], [None, action_dim]) |
| 115 | self.actor.train() |
| 116 | self.critic.train() |
| 117 | |
| 118 | def copy_para(from_model, to_model): |
| 119 | """ |
| 120 | Copy parameters for soft updating |
| 121 | :param from_model: latest model |
| 122 | :param to_model: target model |
| 123 | :return: None |
| 124 | """ |
| 125 | for i, j in zip(from_model.trainable_weights, to_model.trainable_weights): |
| 126 | j.assign(i) |
no outgoing calls
no test coverage detected
searching dependent graphs…