MCPcopy
hub / github.com/open-compass/VLMEvalKit / EVAL

Function EVAL

vlmeval/tools.py:386–413  ·  view source on GitHub ↗
(dataset_name, data_file, **kwargs)

Source from the content-addressed store, hash-verified

384
385
386def EVAL(dataset_name, data_file, **kwargs):
387 from vlmeval.dataset import build_dataset
388 logger = get_logger('VLMEvalKit Tool-Eval')
389 dataset = build_dataset(dataset_name)
390 # Set the judge kwargs first before evaluation or dumping
391 judge_kwargs = {'nproc': 4, 'verbose': True}
392 if 'model' not in kwargs:
393 if dataset.TYPE in ['MCQ', 'Y/N', 'MCQ_MMMU_Pro']:
394 judge_kwargs['model'] = 'chatgpt-0125'
395 elif listinstr(['MMVet', 'LLaVABench', 'MMBench-Video'], dataset_name):
396 judge_kwargs['model'] = 'gpt-4-turbo'
397 elif listinstr(['MMLongBench', 'MMDU'], dataset_name):
398 judge_kwargs['model'] = 'gpt-4o'
399 elif listinstr(['DynaMath', 'MathVerse', 'MathVista', 'MathVision'], dataset_name):
400 judge_kwargs['model'] = 'gpt-4o-mini'
401 else:
402 judge_kwargs['model'] = kwargs['model']
403 judge_kwargs['nproc'] = kwargs.get('nproc', 4)
404 eval_results = dataset.evaluate(data_file, **judge_kwargs)
405 if eval_results is not None:
406 assert isinstance(eval_results, dict) or isinstance(eval_results, pd.DataFrame)
407 logger.info('Evaluation Results:')
408 if isinstance(eval_results, dict):
409 logger.info('\n' + json.dumps(eval_results, indent=4))
410 elif isinstance(eval_results, pd.DataFrame):
411 logger.info('\n')
412 logger.info(tabulate(eval_results.T) if len(eval_results) < len(eval_results.columns) else eval_results)
413 return eval_results
414
415
416def parse_args_eval():

Callers 2

evaluateFunction · 0.90
cliFunction · 0.85

Calls 5

build_datasetFunction · 0.90
listinstrFunction · 0.85
getMethod · 0.80
get_loggerFunction · 0.50
evaluateMethod · 0.45

Tested by

no test coverage detected