Decode a stored ``parse_engine`` field into ``(engine, params, errors)``. ``value`` may be a bare engine (``mineru``) or an encoded directive (``mineru(page_range=1-3,language=en)``). ``engine`` is the bare, normalised engine name; ``params`` is the canonical coerced dict; ``errors``
(
value: Any,
)
| 106 | |
| 107 | |
| 108 | def decode_parse_engine( |
| 109 | value: Any, |
| 110 | ) -> tuple[str, dict[str, Any], list[str]]: |
| 111 | """Decode a stored ``parse_engine`` field into ``(engine, params, errors)``. |
| 112 | |
| 113 | ``value`` may be a bare engine (``mineru``) or an encoded directive |
| 114 | (``mineru(page_range=1-3,language=en)``). ``engine`` is the bare, |
| 115 | normalised engine name; ``params`` is the canonical coerced dict; ``errors`` |
| 116 | is non-empty for a malformed/unbalanced block or invalid params (callers on |
| 117 | the parse path raise so the doc fails visibly instead of dropping params). |
| 118 | """ |
| 119 | raw = str(value or "").strip() |
| 120 | if not raw: |
| 121 | return "", {}, [] |
| 122 | idx = raw.find("(") |
| 123 | if idx == -1: |
| 124 | return normalize_parser_engine(raw), {}, [] |
| 125 | engine = normalize_parser_engine(raw[:idx]) |
| 126 | inner, after = take_paren_block(raw, idx) |
| 127 | if inner is None: |
| 128 | return engine, {}, [f"unbalanced '(' in parse_engine {raw!r}"] |
| 129 | if raw[after:].strip(): |
| 130 | return engine, {}, [f"unexpected text after ')' in parse_engine {raw!r}"] |
| 131 | params, errors = parse_engine_params( |
| 132 | inner, engine=engine, label=f"parse_engine {raw!r}" |
| 133 | ) |
| 134 | return engine, params, errors |
| 135 | |
| 136 | |
| 137 | # --------------------------------------------------------------------------- |