Runs the GEPA optimization loop to train a new system instruction. Args: output_dir: The directory to save experiment results and artifacts. seed_instructions: Agent instructions to initialize the agent with. config: The experiment configuration. Returns: The results of the GEP
(
output_dir: str, seed_instructions: str, config: ExperimentConfig
)
| 525 | |
| 526 | |
| 527 | def run_gepa( |
| 528 | output_dir: str, seed_instructions: str, config: ExperimentConfig |
| 529 | ) -> Any: |
| 530 | """Runs the GEPA optimization loop to train a new system instruction. |
| 531 | |
| 532 | Args: |
| 533 | output_dir: The directory to save experiment results and artifacts. |
| 534 | seed_instructions: Agent instructions to initialize the agent with. |
| 535 | config: The experiment configuration. |
| 536 | |
| 537 | Returns: |
| 538 | The results of the GEPA optimization. |
| 539 | """ |
| 540 | # This section sets up and runs the GEPA optimization experiment. |
| 541 | # Here we define all the parameters for the tau-bench environment, the GEPA |
| 542 | # optimization loop, and the models to be used. |
| 543 | datasets = _get_datasets(config) |
| 544 | training_set = [ |
| 545 | TauBenchDataInst( |
| 546 | env=config.tau_bench_env, |
| 547 | task_id=task_id, |
| 548 | task_split=config.feedback_dataset.split, |
| 549 | ) |
| 550 | for task_id in datasets['train'] |
| 551 | ] |
| 552 | eval_set = [ |
| 553 | TauBenchDataInst( |
| 554 | env=config.tau_bench_env, |
| 555 | task_id=task_id, |
| 556 | task_split=config.pareto_dataset.split, |
| 557 | ) |
| 558 | for task_id in datasets['dev'] |
| 559 | ] |
| 560 | system_instruction_name = 'system_instruction' |
| 561 | |
| 562 | tau_bench_adapter = TauBenchAdapter( |
| 563 | env_name=config.tau_bench_env, |
| 564 | agent_model=config.agent_model, |
| 565 | agent_model_provider=config.agent_model_provider, |
| 566 | user_model=config.user_model, |
| 567 | user_model_provider=config.user_model_provider, |
| 568 | agent_strategy='tool-calling', |
| 569 | user_strategy='llm', |
| 570 | system_instruction_name=system_instruction_name, |
| 571 | max_concurrency=config.max_concurrency, |
| 572 | rater=_rater(config) if config.use_rater else None, |
| 573 | log_dir=os.path.join(output_dir, 'traces'), |
| 574 | ) |
| 575 | |
| 576 | gepa_results = gepa.optimize( |
| 577 | seed_candidate={ |
| 578 | system_instruction_name: seed_instructions, |
| 579 | }, |
| 580 | trainset=training_set, |
| 581 | valset=eval_set, |
| 582 | task_lm=None, # this must be None when a custom adapter is used |
| 583 | adapter=tau_bench_adapter, |
| 584 | max_metric_calls=config.max_metric_calls, |
nothing calls this directly
no test coverage detected