Run prompt through the model 2× in parallel and OR-merge the results. The second look samples the model again on the same prompt — independent sampling means borderline cases can flip between the legs, and the OR merge keeps any finding either leg surfaces. Trades higher API spend for
(prompt, output_schema, *, bool_key: str, list_key: str,
thinking_budget=10000, max_tokens=16000)
| 519 | |
| 520 | |
| 521 | def _call_claude_dual_or(prompt, output_schema, *, bool_key: str, list_key: str, |
| 522 | thinking_budget=10000, max_tokens=16000): |
| 523 | """Run prompt through the model 2× in parallel and OR-merge the results. |
| 524 | |
| 525 | The second look samples the model again on the same prompt — independent |
| 526 | sampling means borderline cases can flip between the legs, and the OR |
| 527 | merge keeps any finding either leg surfaces. Trades higher API spend for |
| 528 | a chance to catch findings a single sample missed. |
| 529 | |
| 530 | bool_key/list_key name the schema's flag-field and findings-array. The |
| 531 | merge unions the two arrays (exact-dict dedup) and ORs the flag. Each leg |
| 532 | falls back to sonnet (with retries) independently if its primary call fails — |
| 533 | 529s are common under load and a single None leg would otherwise drop |
| 534 | one of the two samples on that case. Honors SECURITY_REVIEW_MODEL override |
| 535 | for both calls without fallback. |
| 536 | |
| 537 | Gated by _dual_or_enabled() — off by default to avoid the |
| 538 | 2× API cost. When disabled, short-circuits to a single _call_claude |
| 539 | and wraps the result in the same {bool_key, list_key} envelope so |
| 540 | callers don't need to branch. |
| 541 | """ |
| 542 | from concurrent.futures import ThreadPoolExecutor |
| 543 | |
| 544 | explicit = os.environ.get("SECURITY_REVIEW_MODEL", "").strip() |
| 545 | primary = explicit or SECURITY_REVIEW_MODEL |
| 546 | |
| 547 | if not _dual_or_enabled(): |
| 548 | # Single-call path. Reuse the same sonnet-fallback retry as a dual_or |
| 549 | # leg so a 529/400 on the primary doesn't drop recall to zero. |
| 550 | r = _call_claude(prompt, output_schema, thinking_budget=thinking_budget, |
| 551 | max_tokens=max_tokens, model=primary, retry_5xx=False) |
| 552 | if r is None and not explicit: |
| 553 | debug_log(f"single: {primary} failed, falling back to sonnet") |
| 554 | r = _call_claude(prompt, output_schema, thinking_budget=thinking_budget, |
| 555 | max_tokens=max_tokens, model="claude-sonnet-4-6", |
| 556 | retry_5xx=True) |
| 557 | return r |
| 558 | |
| 559 | def _leg(): |
| 560 | r = _call_claude(prompt, output_schema, thinking_budget=thinking_budget, |
| 561 | max_tokens=max_tokens, model=primary, retry_5xx=False) |
| 562 | if r is None and not explicit: |
| 563 | debug_log(f"dual_or: {primary} leg failed, falling back to sonnet") |
| 564 | r = _call_claude(prompt, output_schema, thinking_budget=thinking_budget, |
| 565 | max_tokens=max_tokens, model="claude-sonnet-4-6", |
| 566 | retry_5xx=True) |
| 567 | return r |
| 568 | |
| 569 | with ThreadPoolExecutor(max_workers=2) as ex: |
| 570 | fa = ex.submit(_leg) |
| 571 | fb = ex.submit(_leg) |
| 572 | ra, rb = fa.result(), fb.result() |
| 573 | |
| 574 | if ra is None and rb is None: |
| 575 | return None |
| 576 | |
| 577 | a_list = (ra or {}).get(list_key) or [] |
| 578 | b_list = (rb or {}).get(list_key) or [] |
no test coverage detected