Extract a JSON object from LLM response text. Tries ```json fences first, then bare {...} patterns.
(text: str)
| 91 | |
| 92 | |
| 93 | def extract_json(text: str) -> dict | None: |
| 94 | """Extract a JSON object from LLM response text. |
| 95 | |
| 96 | Tries ```json fences first, then bare {...} patterns. |
| 97 | """ |
| 98 | m = re.search(r"```json\s*(.*?)```", text, re.DOTALL) |
| 99 | if m: |
| 100 | try: |
| 101 | return json.loads(m.group(1)) |
| 102 | except json.JSONDecodeError: |
| 103 | pass |
| 104 | m = re.search(r"\{.*\}", text, re.DOTALL) |
| 105 | if m: |
| 106 | try: |
| 107 | return json.loads(m.group(0)) |
| 108 | except json.JSONDecodeError: |
| 109 | pass |
| 110 | # Tolerant fallback for non-OpenAI backends (Claude/Qwen, …) whose free-form |
| 111 | # JSON strict json.loads rejects — unescaped ASCII quotes inside CJK string |
| 112 | # values, trailing commas, etc. Repair so the analyst's edits aren't silently |
| 113 | # dropped, but ONLY a single unambiguous object: never feed the greedy `{.*}` |
| 114 | # span or the raw text, or json_repair would quietly return one of several |
| 115 | # objects (empirically the wrong/last one) — strictly worse than None, which |
| 116 | # the caller can detect and retry/skip. |
| 117 | # |
| 118 | # Pick the candidate FIRST, before importing json_repair, so the optional |
| 119 | # dependency only matters (and only warns) when there is genuinely a single |
| 120 | # malformed object we could have repaired. Ordinary no-JSON / prose replies |
| 121 | # have no candidate and return None silently. |
| 122 | candidate = None |
| 123 | fenced = re.search(r"```json\s*(.*?)```", text, re.DOTALL) |
| 124 | if fenced and len(_top_level_brace_objects(fenced.group(1))) == 1: |
| 125 | candidate = fenced.group(1) |
| 126 | else: |
| 127 | objs = _top_level_brace_objects(text) |
| 128 | if len(objs) == 1: |
| 129 | candidate = objs[0] |
| 130 | # 0 or >1 top-level objects → too ambiguous to repair safely → None |
| 131 | if not candidate: |
| 132 | return None |
| 133 | # Final guard: only repair spans that actually look like an intended JSON |
| 134 | # object. Prose pseudo-objects in single quotes / backticks / bare text |
| 135 | # (e.g. `{op: delete}`) reach here because the scan only skips double-quoted |
| 136 | # prose; repairing them would fabricate a wrong dict (worse than None). |
| 137 | if not _looks_json_like(candidate): |
| 138 | return None |
| 139 | try: |
| 140 | from json_repair import repair_json |
| 141 | except ModuleNotFoundError: |
| 142 | warnings.warn( |
| 143 | "json_repair not installed; malformed-JSON recovery disabled — " |
| 144 | "a non-OpenAI analyst edit may be silently dropped. pip install json_repair", |
| 145 | RuntimeWarning, |
| 146 | stacklevel=2, |
| 147 | ) |
| 148 | return None |
| 149 | try: |
| 150 | repaired = repair_json(candidate, return_objects=True) |