Evaluate the model. :param x: Input data. :param y: Labels. :param batch_size: Number of samples when `predict` for evaluation. (default: 128) Examples:: >>> import matchzoo as mz >>> data_pack = mz.datasets.toy.load_data
(
self,
x: typing.Dict[str, np.ndarray],
y: np.ndarray,
batch_size: int = 128
)
| 277 | ) |
| 278 | |
| 279 | def evaluate( |
| 280 | self, |
| 281 | x: typing.Dict[str, np.ndarray], |
| 282 | y: np.ndarray, |
| 283 | batch_size: int = 128 |
| 284 | ) -> typing.Dict[BaseMetric, float]: |
| 285 | """ |
| 286 | Evaluate the model. |
| 287 | |
| 288 | :param x: Input data. |
| 289 | :param y: Labels. |
| 290 | :param batch_size: Number of samples when `predict` for evaluation. |
| 291 | (default: 128) |
| 292 | |
| 293 | Examples:: |
| 294 | >>> import matchzoo as mz |
| 295 | >>> data_pack = mz.datasets.toy.load_data() |
| 296 | >>> preprocessor = mz.preprocessors.NaivePreprocessor() |
| 297 | >>> data_pack = preprocessor.fit_transform(data_pack, verbose=0) |
| 298 | >>> m = mz.models.DenseBaseline() |
| 299 | >>> m.params['task'] = mz.tasks.Ranking() |
| 300 | >>> m.params['task'].metrics = [ |
| 301 | ... 'acc', 'mse', 'mae', 'ce', |
| 302 | ... 'average_precision', 'precision', 'dcg', 'ndcg', |
| 303 | ... 'mean_reciprocal_rank', 'mean_average_precision', 'mrr', |
| 304 | ... 'map', 'MAP', |
| 305 | ... mz.metrics.AveragePrecision(threshold=1), |
| 306 | ... mz.metrics.Precision(k=2, threshold=2), |
| 307 | ... mz.metrics.DiscountedCumulativeGain(k=2), |
| 308 | ... mz.metrics.NormalizedDiscountedCumulativeGain( |
| 309 | ... k=3, threshold=-1), |
| 310 | ... mz.metrics.MeanReciprocalRank(threshold=2), |
| 311 | ... mz.metrics.MeanAveragePrecision(threshold=3) |
| 312 | ... ] |
| 313 | >>> m.guess_and_fill_missing_params(verbose=0) |
| 314 | >>> m.build() |
| 315 | >>> m.compile() |
| 316 | >>> x, y = data_pack.unpack() |
| 317 | >>> evals = m.evaluate(x, y) |
| 318 | >>> type(evals) |
| 319 | <class 'dict'> |
| 320 | |
| 321 | """ |
| 322 | result = dict() |
| 323 | matchzoo_metrics, keras_metrics = self._separate_metrics() |
| 324 | y_pred = self.predict(x, batch_size) |
| 325 | |
| 326 | for metric in keras_metrics: |
| 327 | metric_func = keras.metrics.get(metric) |
| 328 | result[metric] = K.eval(K.mean( |
| 329 | metric_func(K.variable(y), K.variable(y_pred)))) |
| 330 | |
| 331 | if matchzoo_metrics: |
| 332 | if not isinstance(self.params['task'], tasks.Ranking): |
| 333 | raise ValueError("Matchzoo metrics only works on ranking.") |
| 334 | for metric in matchzoo_metrics: |
| 335 | result[metric] = self._eval_metric_on_data_frame( |
| 336 | metric, x['id_left'], y, y_pred) |