(N=1)
| 239 | |
| 240 | |
| 241 | def test_NCELoss(N=1): |
| 242 | from numpy_ml.neural_nets.losses import NCELoss |
| 243 | from numpy_ml.utils.data_structures import DiscreteSampler |
| 244 | |
| 245 | np.random.seed(12345) |
| 246 | |
| 247 | N = np.inf if N is None else N |
| 248 | |
| 249 | i = 1 |
| 250 | while i < N + 1: |
| 251 | n_ex = np.random.randint(1, 10) |
| 252 | n_c = np.random.randint(1, 10) |
| 253 | n_out = np.random.randint(1, 300) |
| 254 | vocab_size = np.random.randint(200, 1000) |
| 255 | num_negative_samples = np.random.randint(1, 10) |
| 256 | |
| 257 | embeddings = random_tensor((n_ex, n_c, n_out), standardize=True) |
| 258 | target = np.random.randint(0, vocab_size, (n_ex, 1)) |
| 259 | |
| 260 | probs = np.random.rand(vocab_size) |
| 261 | probs /= probs.sum() |
| 262 | |
| 263 | D = DiscreteSampler(probs, log=False, with_replacement=False) |
| 264 | NCE = NCELoss(vocab_size, D, num_negative_samples) |
| 265 | my_loss, _ = NCE(embeddings, target.flatten()) |
| 266 | |
| 267 | my_dLdX = NCE.grad(update_params=False) |
| 268 | my_dLdW = NCE.gradients["W"] |
| 269 | my_dLdb = NCE.gradients["b"] |
| 270 | |
| 271 | NCE.gradients["W"] = np.zeros_like(NCE.parameters["W"]) |
| 272 | NCE.gradients["b"] = np.zeros_like(NCE.parameters["b"]) |
| 273 | |
| 274 | MY_final_loss, TF_final_loss = 0, 0 |
| 275 | MY_dLdX, TF_dLdX = np.zeros_like(embeddings), np.zeros_like(embeddings) |
| 276 | TF_dLdW, TF_dLdb = ( |
| 277 | np.zeros_like(NCE.parameters["W"]), |
| 278 | np.zeros_like(NCE.parameters["b"]), |
| 279 | ) |
| 280 | |
| 281 | # XXX: instead of calculating the tf NCE on the entire batch, we |
| 282 | # calculate it per-example and then sum. this is really lame and should |
| 283 | # be changed to operate on batches. |
| 284 | nv = NCE.derived_variables["noise_samples"][0] |
| 285 | for ix, emb in enumerate(embeddings): |
| 286 | sv = (nv[0], np.array([nv[1][0, ix]]), nv[2]) |
| 287 | |
| 288 | NCE.X = [] |
| 289 | for k, v in NCE.derived_variables.items(): |
| 290 | NCE.derived_variables[k] = [] |
| 291 | |
| 292 | for k, v in NCE.gradients.items(): |
| 293 | NCE.gradients[k] = np.zeros_like(v) |
| 294 | |
| 295 | my, _ = NCE(emb[None, :, :], target[ix], neg_samples=sv[0]) |
| 296 | |
| 297 | NCE.derived_variables["noise_samples"] = [sv] |
| 298 | dldx = NCE.grad(update_params=False) |
nothing calls this directly
no test coverage detected