MCPcopy
hub / github.com/llmware-ai/llmware / run_test

Function run_test

solutions/models/dragon_rag_benchmark_tests_llmware.py:40–76  ·  view source on GitHub ↗
(model_name, prompt_list)

Source from the content-addressed store, hash-verified

38
39# Run the benchmark test
40def run_test(model_name, prompt_list):
41
42 print(f"\n > Loading model '{model_name}'")
43 prompter = Prompt().load_model(model_name)
44
45 print(f"\n > Running RAG Benchmark Test against '{model_name}' - 200 questions")
46 for i, entry in enumerate(prompt_list):
47
48 start_time = time.time()
49
50 prompt = entry["query"]
51 context = entry["context"]
52 response = prompter.prompt_main(prompt,context=context,prompt_name="default_with_context", temperature=0.3)
53
54 # Print results
55 time_taken = round(time.time() - start_time, 2)
56 print("\n")
57 print(f"{i+1}. llm_response - {response['llm_response']}")
58 print(f"{i+1}. gold_answer - {entry['answer']}")
59 print(f"{i+1}. time_taken - {time_taken}")
60
61 # Fact checking
62 fc = prompter.evidence_check_numbers(response)
63 sc = prompter.evidence_comparison_stats(response)
64 sr = prompter.evidence_check_sources(response)
65 for fc_entry in fc:
66 for f, facts in enumerate(fc_entry["fact_check"]):
67 print(f"{i+1}. fact_check - {f} {facts}")
68
69 for sc_entry in sc:
70 print(f"{i+1}. comparison_stats - {sc_entry['comparison_stats']}")
71
72 for sr_entry in sr:
73 for s, source in enumerate(sr_entry["source_review"]):
74 print(f"{i+1}. source - {s} {source}")
75
76 return 0
77
78
79if __name__ == "__main__":

Calls 6

PromptClass · 0.90
prompt_mainMethod · 0.80
load_modelMethod · 0.45

Tested by

no test coverage detected