(mod="gpt-3.5-turbo")
| 380 | |
| 381 | |
| 382 | async def eval(mod="gpt-3.5-turbo"): |
| 383 | pt("Initializing weave") |
| 384 | weave.init("openui-dev") |
| 385 | model = OpenUIModel(prompt_template=SYSTEM_PROMPT, model_name=mod) |
| 386 | pt("Loading dataset") |
| 387 | dataset = weave.ref("eval:v0").get() |
| 388 | # dataset = Dataset( |
| 389 | # name="eval", |
| 390 | # rows=[{"prompt": "Make a cool SaaS landing page for an AI startup"}], |
| 391 | # ) |
| 392 | evaluation = Evaluation( |
| 393 | dataset=dataset, |
| 394 | scorers=[scores], |
| 395 | ) |
| 396 | pt("Running evaluation") |
| 397 | await evaluation.evaluate(model) |
| 398 | |
| 399 | |
| 400 | def run_prompt_search(mod: str): |
no test coverage detected