hub / github.com/SWE-agent/mini-swe-agent / main

Function main

src/minisweagent/run/benchmarks/swebench.py:202–271 · view source on GitHub ↗

(
    subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
    split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
    slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)", rich_help_panel="Data selection"),
    filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex", rich_help_panel="Data selection"),
    shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances", rich_help_panel="Data selection"),
    output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"),
    workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"),
    model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
    model_class: str | None = typer.Option(None, "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
    redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"),
    config_spec: list[str] = typer.Option([str(DEFAULT_CONFIG_FILE)], "-c", "--config", help=_CONFIG_SPEC_HELP_TEXT, rich_help_panel="Basic"),
    environment_class: str | None = typer.Option(None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"),
)

Source from the content-addressed store, hash-verified

200	# fmt: off
201	@app.command(help=_HELP_TEXT)
202	def main(
203	subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
204	split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
205	slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)", rich_help_panel="Data selection"),
206	filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex", rich_help_panel="Data selection"),
207	shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances", rich_help_panel="Data selection"),
208	output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"),
209	workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"),
210	model: str \| None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
211	model_class: str \| None = typer.Option(None, "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
212	redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"),
213	config_spec: list[str] = typer.Option([str(DEFAULT_CONFIG_FILE)], "-c", "--config", help=_CONFIG_SPEC_HELP_TEXT, rich_help_panel="Basic"),
214	environment_class: str \| None = typer.Option(None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"),
215	) -> None:
216	# fmt: on
217	output_path = Path(output)
218	output_path.mkdir(parents=True, exist_ok=True)
219	logger.info(f"Results will be saved to {output_path}")
220	add_file_handler(output_path / "minisweagent.log")
221
222	from datasets import load_dataset
223
224	dataset_path = DATASET_MAPPING.get(subset, subset)
225	logger.info(f"Loading dataset {dataset_path}, split {split}...")
226	instances = list(load_dataset(dataset_path, split=split))
227
228	instances = filter_instances(instances, filter_spec=filter_spec, slice_spec=slice_spec, shuffle=shuffle)
229	if not redo_existing and (output_path / "preds.json").exists():
230	existing_instances = list(json.loads((output_path / "preds.json").read_text()).keys())
231	logger.info(f"Skipping {len(existing_instances)} existing instances")
232	instances = [instance for instance in instances if instance["instance_id"] not in existing_instances]
233	logger.info(f"Running on {len(instances)} instances...")
234
235	logger.info(f"Building agent config from specs: {config_spec}")
236	configs = [get_config_from_spec(spec) for spec in config_spec]
237	configs.append({
238	"environment": {"environment_class": environment_class or UNSET},
239	"model": {"model_name": model or UNSET, "model_class": model_class or UNSET},
240	})
241	config = recursive_merge(*configs)
242
243	progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml")
244
245	def process_futures(futures: dict[concurrent.futures.Future, str]):
246	for future in concurrent.futures.as_completed(futures):
247	try:
248	future.result()
249	except concurrent.futures.CancelledError:
250	pass
251	except Exception as e:
252	instance_id = futures[future]
253	logger.error(f"Error in future for instance {instance_id}: {e}", exc_info=True)
254	progress_manager.on_uncaught_exception(instance_id, e)
255
256	with Live(progress_manager.render_group, refresh_per_second=4):
257	with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
258	futures = {
259	executor.submit(process_instance, instance, output_path, config, progress_manager): instance[

Callers 6

test_swebench_end_to_endFunction · 0.90

test_redo_existing_false_skips_existingFunction · 0.90

test_redo_existing_true_overwrites_existingFunction · 0.90

test_exception_handling_in_agent_runFunction · 0.90

test_different_exception_typesFunction · 0.90

test_exception_handling_with_progress_managerFunction · 0.90

Calls 6

add_file_handlerFunction · 0.90

get_config_from_specFunction · 0.90

recursive_mergeFunction · 0.90

RunBatchProgressManagerClass · 0.90

filter_instancesFunction · 0.85

process_futuresFunction · 0.70

Tested by 6

test_swebench_end_to_endFunction · 0.72

test_redo_existing_false_skips_existingFunction · 0.72

test_redo_existing_true_overwrites_existingFunction · 0.72

test_exception_handling_in_agent_runFunction · 0.72

test_different_exception_typesFunction · 0.72

test_exception_handling_with_progress_managerFunction · 0.72

Used in the wild real call sites across dependent graphs

searching dependent graphs…