| 328 | |
| 329 | |
| 330 | def build_prompt(line, dataset_name): |
| 331 | try: |
| 332 | criteria = eval(line['criteria']) |
| 333 | except: |
| 334 | criteria = line['criteria'] |
| 335 | |
| 336 | if isinstance(criteria, dict): |
| 337 | new_criteria = {} |
| 338 | for k in criteria: |
| 339 | if 'subjective' in k.lower(): |
| 340 | new_criteria['subjective'] = criteria[k] |
| 341 | else: |
| 342 | new_criteria['objective'] = criteria[k] |
| 343 | else: |
| 344 | assert isinstance(criteria, str) |
| 345 | new_criteria = {'subjective': criteria} |
| 346 | criteria = new_criteria |
| 347 | assert 'subjective' in criteria, 'No subjective criteria found in the criteria dict' |
| 348 | |
| 349 | prompts = {} |
| 350 | if listinstr(['Creation_MMBench'], dataset_name): |
| 351 | dataset_name = 'Creation_MMBench' |
| 352 | prompts['subjective'] = prompt_dict[dataset_name]['subjective'].format( |
| 353 | instructions=line['question'], |
| 354 | criteria=criteria['subjective'], |
| 355 | reference_answer_by_gpt4o=line['reference_answer_by_gpt4o'], |
| 356 | prediction=line['prediction'] |
| 357 | ) |
| 358 | if 'objective' in criteria: |
| 359 | if 'ground_truth' in line and (not pd.isna(line['ground_truth'])) and line['ground_truth'] != '': |
| 360 | prompts['objective'] = prompt_dict[dataset_name]['objective_with_gt'].format( |
| 361 | instructions=line['question'], |
| 362 | criteria=criteria['objective'], |
| 363 | groundtruth=line['ground_truth'], |
| 364 | reference_answer_by_gpt4o=line['reference_answer_by_gpt4o'], |
| 365 | prediction=line['prediction']) |
| 366 | else: |
| 367 | prompts['objective'] = prompt_dict[dataset_name]['objective_without_gt'].format( |
| 368 | instructions=line['question'], |
| 369 | criteria=criteria['objective'], |
| 370 | reference_answer_by_gpt4o=line['reference_answer_by_gpt4o'], |
| 371 | prediction=line['prediction']) |
| 372 | return prompts |
| 373 | |
| 374 | |
| 375 | def Generate_Creation_MMBench_judge(model, image_list, prompt): |