(
slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)", rich_help_panel="Data selection"),
filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex", rich_help_panel="Data selection"),
shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances", rich_help_panel="Data selection"),
output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"),
workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"),
model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
model_class: str | None = typer.Option(None, "--model-class", help="Model class to use", rich_help_panel="Advanced"),
redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"),
config_spec: list[str] = typer.Option([str(DEFAULT_CONFIG_FILE)], "-c", "--config", help="Config files (merged left to right)", rich_help_panel="Basic"),
environment_class: str | None = typer.Option(None, "--environment-class", help="Environment type (e.g., docker, singularity)", rich_help_panel="Advanced"),
)
| 120 | # fmt: off |
| 121 | @app.command(help=_HELP_TEXT) |
| 122 | def main( |
| 123 | slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)", rich_help_panel="Data selection"), |
| 124 | filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex", rich_help_panel="Data selection"), |
| 125 | shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances", rich_help_panel="Data selection"), |
| 126 | output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"), |
| 127 | workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"), |
| 128 | model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"), |
| 129 | model_class: str | None = typer.Option(None, "--model-class", help="Model class to use", rich_help_panel="Advanced"), |
| 130 | redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"), |
| 131 | config_spec: list[str] = typer.Option([str(DEFAULT_CONFIG_FILE)], "-c", "--config", help="Config files (merged left to right)", rich_help_panel="Basic"), |
| 132 | environment_class: str | None = typer.Option(None, "--environment-class", help="Environment type (e.g., docker, singularity)", rich_help_panel="Advanced"), |
| 133 | ) -> None: |
| 134 | # fmt: on |
| 135 | from programbench.utils.instance_filters import filter_instances # pylint: disable=import-error |
| 136 | from programbench.utils.load_data import load_all_instances # pylint: disable=import-error |
| 137 | |
| 138 | output_path = Path(output) if output else Path(f"programbench_results_{int(time.time())}") |
| 139 | output_path.mkdir(parents=True, exist_ok=True) |
| 140 | logger.info(f"Results will be saved to {output_path}") |
| 141 | add_file_handler(output_path / "minisweagent.log") |
| 142 | |
| 143 | instances = load_all_instances(include_tests=False) |
| 144 | instances = filter_instances(instances, filter_spec=filter_spec, slice_spec=slice_spec, shuffle=shuffle) |
| 145 | |
| 146 | if not redo_existing: |
| 147 | existing = {i["instance_id"] for i in instances if (output_path / i["instance_id"] / "submission.tar.gz").exists()} |
| 148 | if existing: |
| 149 | logger.info(f"Skipping {len(existing)} existing instances") |
| 150 | instances = [i for i in instances if i["instance_id"] not in existing] |
| 151 | |
| 152 | logger.info(f"Running on {len(instances)} instances...") |
| 153 | |
| 154 | configs = [get_config_from_spec(spec) for spec in config_spec] |
| 155 | configs.append({ |
| 156 | "environment": {"environment_class": environment_class or UNSET}, |
| 157 | "model": {"model_name": model or UNSET, "model_class": model_class or UNSET}, |
| 158 | }) |
| 159 | config = recursive_merge(*configs) |
| 160 | |
| 161 | progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{int(time.time())}.yaml") |
| 162 | |
| 163 | def process_futures(futures: dict[concurrent.futures.Future, str]): |
| 164 | for future in concurrent.futures.as_completed(futures): |
| 165 | try: |
| 166 | future.result() |
| 167 | except concurrent.futures.CancelledError: |
| 168 | pass |
| 169 | except Exception as e: |
| 170 | instance_id = futures[future] |
| 171 | logger.error(f"Error in future for instance {instance_id}: {e}", exc_info=True) |
| 172 | progress_manager.on_uncaught_exception(instance_id, e) |
| 173 | |
| 174 | with Live(progress_manager.render_group, refresh_per_second=4): |
| 175 | with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor: |
| 176 | futures = { |
| 177 | executor.submit(process_instance, instance, output_path, config, progress_manager): instance[ |
| 178 | "instance_id" |
| 179 | ] |
searching dependent graphs…