MCPcopy Index your code
hub / github.com/lazyprogrammer/machine_learning_examples / __init__

Method __init__

rl2/mountaincar/pg_tf.py:51–91  ·  view source on GitHub ↗
(self, D, ft, hidden_layer_sizes=[])

Source from the content-addressed store, hash-verified

49# approximates pi(a | s)
50class PolicyModel:
51 def __init__(self, D, ft, hidden_layer_sizes=[]):
52 self.ft = ft
53
54 ##### hidden layers #####
55 M1 = D
56 self.hidden_layers = []
57 for M2 in hidden_layer_sizes:
58 layer = HiddenLayer(M1, M2)
59 self.hidden_layers.append(layer)
60 M1 = M2
61
62 # final layer mean
63 self.mean_layer = HiddenLayer(M1, 1, lambda x: x, use_bias=False, zeros=True)
64
65 # final layer variance
66 self.stdv_layer = HiddenLayer(M1, 1, tf.nn.softplus, use_bias=False, zeros=False)
67
68 # inputs and targets
69 self.X = tf.placeholder(tf.float32, shape=(None, D), name='X')
70 self.actions = tf.placeholder(tf.float32, shape=(None,), name='actions')
71 self.advantages = tf.placeholder(tf.float32, shape=(None,), name='advantages')
72
73 # get final hidden layer
74 Z = self.X
75 for layer in self.hidden_layers:
76 Z = layer.forward(Z)
77
78 # calculate output and cost
79 mean = self.mean_layer.forward(Z)
80 stdv = self.stdv_layer.forward(Z) + 1e-5 # smoothing
81
82 # make them 1-D
83 mean = tf.reshape(mean, [-1])
84 stdv = tf.reshape(stdv, [-1])
85
86 norm = tf.contrib.distributions.Normal(mean, stdv)
87 self.predict_op = tf.clip_by_value(norm.sample(), -1, 1)
88
89 log_probs = norm.log_prob(self.actions)
90 cost = -tf.reduce_sum(self.advantages * log_probs + 0.1*norm.entropy())
91 self.train_op = tf.train.AdamOptimizer(1e-3).minimize(cost)
92
93 def set_session(self, session):
94 self.session = session

Callers

nothing calls this directly

Calls 4

forwardMethod · 0.95
entropyMethod · 0.80
HiddenLayerClass · 0.70
sampleMethod · 0.45

Tested by

no test coverage detected