evaluate performance on a given split
(split, dp, model, params, misc, **kwargs)
| 10 | return GenericBatchGenerator |
| 11 | |
| 12 | def eval_split(split, dp, model, params, misc, **kwargs): |
| 13 | """ evaluate performance on a given split """ |
| 14 | # allow kwargs to override what is inside params |
| 15 | eval_batch_size = kwargs.get('eval_batch_size', params.get('eval_batch_size',100)) |
| 16 | eval_max_images = kwargs.get('eval_max_images', params.get('eval_max_images', -1)) |
| 17 | BatchGenerator = decodeGenerator(params) |
| 18 | wordtoix = misc['wordtoix'] |
| 19 | |
| 20 | print 'evaluating %s performance in batches of %d' % (split, eval_batch_size) |
| 21 | logppl = 0 |
| 22 | logppln = 0 |
| 23 | nsent = 0 |
| 24 | for batch in dp.iterImageSentencePairBatch(split = split, max_batch_size = eval_batch_size, max_images = eval_max_images): |
| 25 | Ys, gen_caches = BatchGenerator.forward(batch, model, params, misc, predict_mode = True) |
| 26 | |
| 27 | for i,pair in enumerate(batch): |
| 28 | gtix = [ wordtoix[w] for w in pair['sentence']['tokens'] if w in wordtoix ] |
| 29 | gtix.append(0) # we expect END token at the end |
| 30 | Y = Ys[i] |
| 31 | maxes = np.amax(Y, axis=1, keepdims=True) |
| 32 | e = np.exp(Y - maxes) # for numerical stability shift into good numerical range |
| 33 | P = e / np.sum(e, axis=1, keepdims=True) |
| 34 | logppl += - np.sum(np.log2(1e-20 + P[range(len(gtix)),gtix])) # also accumulate log2 perplexities |
| 35 | logppln += len(gtix) |
| 36 | nsent += 1 |
| 37 | |
| 38 | ppl2 = 2 ** (logppl / logppln) |
| 39 | print 'evaluated %d sentences and got perplexity = %f' % (nsent, ppl2) |
| 40 | return ppl2 # return the perplexity |
no test coverage detected