Shared helper: setup sim, start server, run rollout, assert results.
(
env: dict,
blocks: list,
server_model_key: str,
client_env_name_old: str,
client_env_name_new: str,
server_startup_env_var: str,
)
| 49 | |
| 50 | |
| 51 | def _run_simplerenv_eval( |
| 52 | env: dict, |
| 53 | blocks: list, |
| 54 | server_model_key: str, |
| 55 | client_env_name_old: str, |
| 56 | client_env_name_new: str, |
| 57 | server_startup_env_var: str, |
| 58 | ) -> None: |
| 59 | """Shared helper: setup sim, start server, run rollout, assert results.""" |
| 60 | # Step 1: Setup sim (shared across both benchmarks) |
| 61 | with timed("step 1: sim venv setup (setup_SimplerEnv.sh)"): |
| 62 | run_bash_blocks( |
| 63 | [find_block(blocks, "setup_SimplerEnv.sh", language="bash")], |
| 64 | cwd=REPO_ROOT, |
| 65 | env=env, |
| 66 | ) |
| 67 | |
| 68 | model_server_host = "127.0.0.1" |
| 69 | model_server_port = 5559 |
| 70 | |
| 71 | # Step 2: Server — inject test-specific flags |
| 72 | server_code = find_block(blocks, server_model_key, language="bash").code |
| 73 | server_code += f" --device cuda:0 --host {model_server_host} --port {model_server_port}" |
| 74 | |
| 75 | # Step 3: Rollout — substitute test-safe values |
| 76 | rollout_code = replace_once( |
| 77 | replace_once( |
| 78 | replace_once( |
| 79 | replace_once( |
| 80 | replace_once( |
| 81 | find_block(blocks, client_env_name_old, language="bash").code, |
| 82 | "--n-episodes 10", |
| 83 | "--n-episodes 1", |
| 84 | ), |
| 85 | "--policy-client-port 5555", |
| 86 | f"--policy-client-port {model_server_port}", |
| 87 | ), |
| 88 | "--max-episode-steps 300", |
| 89 | "--max-episode-steps 2", |
| 90 | ), |
| 91 | "--n-envs 5", |
| 92 | "--n-envs 1", |
| 93 | ), |
| 94 | client_env_name_old, |
| 95 | client_env_name_new, |
| 96 | ) |
| 97 | |
| 98 | assert_port_available(model_server_host, model_server_port) |
| 99 | model_server_proc, server_log = start_server_process(server_code, cwd=REPO_ROOT, env=env) |
| 100 | with timed("step 2: server startup"): |
| 101 | wait_for_server_ready( |
| 102 | proc=model_server_proc, |
| 103 | host=model_server_host, |
| 104 | port=model_server_port, |
| 105 | timeout_s=float(os.getenv(server_startup_env_var, str(DEFAULT_SERVER_STARTUP_SECONDS))), |
| 106 | server_log=server_log, |
| 107 | ) |
| 108 |
no test coverage detected