MCPcopy Index your code
hub / github.com/anthropics/defending-code-reference-harness / grade_patch

Function grade_patch

harness/patch_grade.py:37–238  ·  view source on GitHub ↗

Grade a candidate patch against a verified crash. Spins a fresh container, applies the diff, walks the ladder. Short-circuits on the first failing tier (T0→T1→T2→re-attack). Re-attack commits the patched container to a temp image and runs a 50-turn find-agent against it; any crash f

(
    target: TargetConfig,
    crash: CrashArtifact,
    diff: bytes | list[bytes],
    model: str,
    container_name: str = "pgrade",
    run_reattack: bool = True,
    reattack_with_diff: bool = False,
    run_style: bool = False,
    reattack_focus: str | None = None,
    agent_env: dict[str, str] | None = None,
    progress_prefix: str | None = None,
    transcript_path: str | None = None,
    system_prompt: str | None = None,
)

Source from the content-addressed store, hash-verified

35
36
37async def grade_patch(
38 target: TargetConfig,
39 crash: CrashArtifact,
40 diff: bytes | list[bytes],
41 model: str,
42 container_name: str = "pgrade",
43 run_reattack: bool = True,
44 reattack_with_diff: bool = False,
45 run_style: bool = False,
46 reattack_focus: str | None = None,
47 agent_env: dict[str, str] | None = None,
48 progress_prefix: str | None = None,
49 transcript_path: str | None = None,
50 system_prompt: str | None = None,
51) -> PatchVerdict:
52 """Grade a candidate patch against a verified crash.
53
54 Spins a fresh container, applies the diff, walks the ladder. Short-circuits
55 on the first failing tier (T0→T1→T2→re-attack). Re-attack commits the
56 patched container to a temp image and runs a 50-turn find-agent against it;
57 any crash fails the tier.
58 """
59 if not target.build_command:
60 raise ValueError(
61 f"target {target.name!r} has no build_command — patch grading "
62 f"requires an in-container rebuild step (set it in config.yaml)"
63 )
64 if crash.poc_path not in crash.reproduction_command:
65 raise ValueError(
66 f"poc_path {crash.poc_path!r} not in reproduction_command "
67 f"{crash.reproduction_command!r}"
68 )
69
70 evidence: dict[str, str] = {}
71 timings: dict[str, float] = {}
72 t0 = t1 = re_clean = False
73 t2: bool | None = None
74 t3: float | None = None
75
76 patched_tag: str | None = None
77 try:
78 # T0–T2 only run target code (apply diff, rebuild, replay PoC, test
79 # suite) via docker_ops.exec_sh — never `claude -p`. So:
80 # auth=None — don't put the API credential in the env of a
81 # container running a binary the PoC was crafted to
82 # crash.
83 # network=none — it never calls the API, so it needs no egress.
84 # Without this the sandbox default would give it
85 # vp-internal (sandboxed) or bridge (--dangerously-
86 # no-sandbox); the latter is full egress for a
87 # process fed attacker-controlled input.
88 # The re-attack and style-judge stages spawn their own containers with
89 # agent_env + network where they need it.
90 with sandbox.agent_container(
91 target.image_tag,
92 container_name,
93 None,
94 memory=target.memory_limit,

Calls 7

_clipFunction · 0.85
_verdictFunction · 0.85
_t1_passesFunction · 0.85
_tailFunction · 0.85
_focus_hintFunction · 0.85
run_findFunction · 0.85
_style_scoreFunction · 0.85