Function EVAL

vlmeval/tools.py:386–413 · view source on GitHub ↗

(dataset_name, data_file, **kwargs)

Source from the content-addressed store, hash-verified

384
385
386	def EVAL(dataset_name, data_file, **kwargs):
387	from vlmeval.dataset import build_dataset
388	logger = get_logger('VLMEvalKit Tool-Eval')
389	dataset = build_dataset(dataset_name)
390	# Set the judge kwargs first before evaluation or dumping
391	judge_kwargs = {'nproc': 4, 'verbose': True}
392	if 'model' not in kwargs:
393	if dataset.TYPE in ['MCQ', 'Y/N', 'MCQ_MMMU_Pro']:
394	judge_kwargs['model'] = 'chatgpt-0125'
395	elif listinstr(['MMVet', 'LLaVABench', 'MMBench-Video'], dataset_name):
396	judge_kwargs['model'] = 'gpt-4-turbo'
397	elif listinstr(['MMLongBench', 'MMDU'], dataset_name):
398	judge_kwargs['model'] = 'gpt-4o'
399	elif listinstr(['DynaMath', 'MathVerse', 'MathVista', 'MathVision'], dataset_name):
400	judge_kwargs['model'] = 'gpt-4o-mini'
401	else:
402	judge_kwargs['model'] = kwargs['model']
403	judge_kwargs['nproc'] = kwargs.get('nproc', 4)
404	eval_results = dataset.evaluate(data_file, **judge_kwargs)
405	if eval_results is not None:
406	assert isinstance(eval_results, dict) or isinstance(eval_results, pd.DataFrame)
407	logger.info('Evaluation Results:')
408	if isinstance(eval_results, dict):
409	logger.info('\n' + json.dumps(eval_results, indent=4))
410	elif isinstance(eval_results, pd.DataFrame):
411	logger.info('\n')
412	logger.info(tabulate(eval_results.T) if len(eval_results) < len(eval_results.columns) else eval_results)
413	return eval_results
414
415
416	def parse_args_eval():

evaluateFunction · 0.90

cliFunction · 0.85

build_datasetFunction · 0.90

listinstrFunction · 0.85

getMethod · 0.80

get_loggerFunction · 0.50

evaluateMethod · 0.45

no test coverage detected