(
subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)", rich_help_panel="Data selection"),
filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex", rich_help_panel="Data selection"),
shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances", rich_help_panel="Data selection"),
output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"),
workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"),
model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
model_class: str | None = typer.Option(None, "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"),
config_spec: list[str] = typer.Option([str(DEFAULT_CONFIG_FILE)], "-c", "--config", help=_CONFIG_SPEC_HELP_TEXT, rich_help_panel="Basic"),
environment_class: str | None = typer.Option(None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"),
)
| 200 | # fmt: off |
| 201 | @app.command(help=_HELP_TEXT) |
| 202 | def main( |
| 203 | subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"), |
| 204 | split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"), |
| 205 | slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)", rich_help_panel="Data selection"), |
| 206 | filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex", rich_help_panel="Data selection"), |
| 207 | shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances", rich_help_panel="Data selection"), |
| 208 | output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"), |
| 209 | workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"), |
| 210 | model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"), |
| 211 | model_class: str | None = typer.Option(None, "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"), |
| 212 | redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"), |
| 213 | config_spec: list[str] = typer.Option([str(DEFAULT_CONFIG_FILE)], "-c", "--config", help=_CONFIG_SPEC_HELP_TEXT, rich_help_panel="Basic"), |
| 214 | environment_class: str | None = typer.Option(None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"), |
| 215 | ) -> None: |
| 216 | # fmt: on |
| 217 | output_path = Path(output) |
| 218 | output_path.mkdir(parents=True, exist_ok=True) |
| 219 | logger.info(f"Results will be saved to {output_path}") |
| 220 | add_file_handler(output_path / "minisweagent.log") |
| 221 | |
| 222 | from datasets import load_dataset |
| 223 | |
| 224 | dataset_path = DATASET_MAPPING.get(subset, subset) |
| 225 | logger.info(f"Loading dataset {dataset_path}, split {split}...") |
| 226 | instances = list(load_dataset(dataset_path, split=split)) |
| 227 | |
| 228 | instances = filter_instances(instances, filter_spec=filter_spec, slice_spec=slice_spec, shuffle=shuffle) |
| 229 | if not redo_existing and (output_path / "preds.json").exists(): |
| 230 | existing_instances = list(json.loads((output_path / "preds.json").read_text()).keys()) |
| 231 | logger.info(f"Skipping {len(existing_instances)} existing instances") |
| 232 | instances = [instance for instance in instances if instance["instance_id"] not in existing_instances] |
| 233 | logger.info(f"Running on {len(instances)} instances...") |
| 234 | |
| 235 | logger.info(f"Building agent config from specs: {config_spec}") |
| 236 | configs = [get_config_from_spec(spec) for spec in config_spec] |
| 237 | configs.append({ |
| 238 | "environment": {"environment_class": environment_class or UNSET}, |
| 239 | "model": {"model_name": model or UNSET, "model_class": model_class or UNSET}, |
| 240 | }) |
| 241 | config = recursive_merge(*configs) |
| 242 | |
| 243 | progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml") |
| 244 | |
| 245 | def process_futures(futures: dict[concurrent.futures.Future, str]): |
| 246 | for future in concurrent.futures.as_completed(futures): |
| 247 | try: |
| 248 | future.result() |
| 249 | except concurrent.futures.CancelledError: |
| 250 | pass |
| 251 | except Exception as e: |
| 252 | instance_id = futures[future] |
| 253 | logger.error(f"Error in future for instance {instance_id}: {e}", exc_info=True) |
| 254 | progress_manager.on_uncaught_exception(instance_id, e) |
| 255 | |
| 256 | with Live(progress_manager.render_group, refresh_per_second=4): |
| 257 | with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor: |
| 258 | futures = { |
| 259 | executor.submit(process_instance, instance, output_path, config, progress_manager): instance[ |
searching dependent graphs…