Run OpenCode for *question* and return a benchmark result.
(self, question: str)
| 616 | ) |
| 617 | |
| 618 | async def run(self, question: str) -> AgentResult: |
| 619 | """Run OpenCode for *question* and return a benchmark result.""" |
| 620 | with agent_run_span( |
| 621 | "opencode-agent", model=self._model_id, question=question |
| 622 | ) as span: |
| 623 | run_started = time.perf_counter() |
| 624 | started_at = _dt.datetime.now(_dt.UTC).isoformat() |
| 625 | |
| 626 | cmd = [ |
| 627 | self._opencode_bin, |
| 628 | "run", |
| 629 | "--pure", |
| 630 | "--format", |
| 631 | "json", |
| 632 | "--model", |
| 633 | self._opencode_model, |
| 634 | "--agent", |
| 635 | self._agent_name, |
| 636 | "--dir", |
| 637 | str(self._run_dir), |
| 638 | "--title", |
| 639 | "AssetOpsBench", |
| 640 | ] |
| 641 | if self._attach: |
| 642 | cmd.extend(["--attach", self._attach]) |
| 643 | if self._variant: |
| 644 | cmd.extend(["--variant", self._variant]) |
| 645 | if self._thinking: |
| 646 | cmd.append("--thinking") |
| 647 | if self._dangerously_skip_permissions: |
| 648 | cmd.append("--dangerously-skip-permissions") |
| 649 | cmd.append(question) |
| 650 | |
| 651 | env = os.environ.copy() |
| 652 | # The OpenCode subprocess should not expose host-side evaluation |
| 653 | # output paths to file/bash tools. The Python wrapper persists the |
| 654 | # trajectory after OpenCode exits, using the parent process env. |
| 655 | env.pop("AGENT_TRAJECTORY_DIR", None) |
| 656 | env.pop("SCENARIOS_DATA_DIR", None) |
| 657 | env.update(self._env_overrides) |
| 658 | env["OPENCODE_CONFIG_CONTENT"] = json.dumps(self._config) |
| 659 | env.setdefault("OPENCODE_DISABLE_AUTOUPDATE", "true") |
| 660 | env.setdefault("NO_COLOR", "1") |
| 661 | |
| 662 | _log.info( |
| 663 | "OpenCodeAgentRunner: starting query (model=%s, opencode_model=%s)", |
| 664 | self._model_id, |
| 665 | self._opencode_model, |
| 666 | ) |
| 667 | proc = await asyncio.create_subprocess_exec( |
| 668 | *cmd, |
| 669 | cwd=str(self._run_dir), |
| 670 | env=env, |
| 671 | stdout=asyncio.subprocess.PIPE, |
| 672 | stderr=asyncio.subprocess.PIPE, |
| 673 | ) |
| 674 | try: |
| 675 | stdout_b, stderr_b = await asyncio.wait_for( |
no test coverage detected