hub / github.com/lazyprogrammer/machine_learning_examples / __init__

Method init

rl2/a3c/nets.py:39–81 · view source on GitHub ↗

(self, num_outputs, reg=0.01)

Source from the content-addressed store, hash-verified

37
38	class PolicyNetwork:
39	def __init__(self, num_outputs, reg=0.01):
40	self.num_outputs = num_outputs
41
42	# Graph inputs
43	# After resizing we have 4 consecutive frames of size 84 x 84
44	self.states = tf.placeholder(shape=[None, 84, 84, 4], dtype=tf.uint8, name="X")
45	# Advantage = G - V(s)
46	self.advantage = tf.placeholder(shape=[None], dtype=tf.float32, name="y")
47	# Selected actions
48	self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name="actions")
49
50	# Since we set reuse=False here, that means we MUST
51	# create the PolicyNetwork before creating the ValueNetwork
52	# ValueNetwork will use reuse=True
53	with tf.variable_scope("shared", reuse=False):
54	fc1 = build_feature_extractor(self.states)
55
56	# Use a separate scope for output and loss
57	with tf.variable_scope("policy_network"):
58	self.logits = tf.contrib.layers.fully_connected(fc1, num_outputs, activation_fn=None)
59	self.probs = tf.nn.softmax(self.logits)
60
61	# Sample an action
62	cdist = tf.distributions.Categorical(logits=self.logits)
63	self.sample_action = cdist.sample()
64
65	# Add regularization to increase exploration
66	self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs), axis=1)
67
68	# Get the predictions for the chosen actions only
69	batch_size = tf.shape(self.states)[0]
70	gather_indices = tf.range(batch_size) * tf.shape(self.probs)[1] + self.actions
71	self.selected_action_probs = tf.gather(tf.reshape(self.probs, [-1]), gather_indices)
72
73	self.loss = tf.log(self.selected_action_probs) * self.advantage + reg * self.entropy
74	self.loss = -tf.reduce_sum(self.loss, name="loss")
75
76	# training
77	self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)
78
79	# we'll need these later for running gradient descent steps
80	self.grads_and_vars = self.optimizer.compute_gradients(self.loss)
81	self.grads_and_vars = [[grad, var] for grad, var in self.grads_and_vars if grad is not None]
82
83
84	class ValueNetwork:

Callers

nothing calls this directly

Calls 2

build_feature_extractorFunction · 0.85

sampleMethod · 0.45

Tested by

no test coverage detected

Method __init__

Source from the content-addressed store, hash-verified

Callers

Calls 2

Tested by

Method init