| 2190 | |
| 2191 | |
| 2192 | def TFNCELoss(X, target_word, L): |
| 2193 | from tensorflow.python.ops.nn_impl import _compute_sampled_logits |
| 2194 | from tensorflow.python.ops.nn_impl import sigmoid_cross_entropy_with_logits |
| 2195 | |
| 2196 | tf.compat.v1.disable_eager_execution() |
| 2197 | |
| 2198 | in_embed = tf.compat.v1.placeholder(tf.float32, shape=X.shape) |
| 2199 | in_bias = tf.compat.v1.placeholder( |
| 2200 | tf.float32, shape=L.parameters["b"].flatten().shape |
| 2201 | ) |
| 2202 | in_weights = tf.compat.v1.placeholder(tf.float32, shape=L.parameters["W"].shape) |
| 2203 | in_target_word = tf.compat.v1.placeholder(tf.int64) |
| 2204 | in_neg_samples = tf.compat.v1.placeholder(tf.int32) |
| 2205 | in_target_prob = tf.compat.v1.placeholder(tf.float32) |
| 2206 | in_neg_samp_prob = tf.compat.v1.placeholder(tf.float32) |
| 2207 | |
| 2208 | # in_embed = tf.keras.Input(dtype=tf.float32, shape=X.shape) |
| 2209 | # in_bias = tf.keras.Input(dtype=tf.float32, shape=L.parameters["b"].flatten().shape) |
| 2210 | # in_weights = tf.keras.Input(dtype=tf.float32, shape=L.parameters["W"].shape) |
| 2211 | # in_target_word = tf.keras.Input(dtype=tf.int64, shape=()) |
| 2212 | # in_neg_samples = tf.keras.Input(dtype=tf.int32, shape=()) |
| 2213 | # in_target_prob = tf.keras.Input(dtype=tf.float32, shape=()) |
| 2214 | # in_neg_samp_prob = tf.keras.Input(dtype=tf.float32, shape=()) |
| 2215 | |
| 2216 | feed = { |
| 2217 | in_embed: X, |
| 2218 | in_weights: L.parameters["W"], |
| 2219 | in_target_word: target_word, |
| 2220 | in_bias: L.parameters["b"].flatten(), |
| 2221 | in_neg_samples: L.derived_variables["noise_samples"][0], |
| 2222 | in_target_prob: L.derived_variables["noise_samples"][1], |
| 2223 | in_neg_samp_prob: L.derived_variables["noise_samples"][2], |
| 2224 | } |
| 2225 | |
| 2226 | # Compute the NCE loss, using a sample of the negative labels each time. |
| 2227 | nce_unreduced = tf.nn.nce_loss( |
| 2228 | weights=in_weights, |
| 2229 | biases=in_bias, |
| 2230 | labels=in_target_word, |
| 2231 | inputs=in_embed, |
| 2232 | sampled_values=(in_neg_samples, in_target_prob, in_neg_samp_prob), |
| 2233 | num_sampled=L.num_negative_samples, |
| 2234 | num_classes=L.n_classes, |
| 2235 | ) |
| 2236 | |
| 2237 | loss = tf.reduce_sum(nce_unreduced) |
| 2238 | dLdW = tf.gradients(loss, [in_weights])[0] |
| 2239 | dLdb = tf.gradients(loss, [in_bias])[0] |
| 2240 | dLdX = tf.gradients(loss, [in_embed])[0] |
| 2241 | |
| 2242 | sampled_logits, sampled_labels = _compute_sampled_logits( |
| 2243 | weights=in_weights, |
| 2244 | biases=in_bias, |
| 2245 | labels=in_target_word, |
| 2246 | inputs=in_embed, |
| 2247 | sampled_values=(in_neg_samples, in_target_prob, in_neg_samp_prob), |
| 2248 | num_sampled=L.num_negative_samples, |
| 2249 | num_classes=L.n_classes, |