* Detect PII spans via the Presidio analyzer. An empty `entityTypes` ⇒ detect all. * Throws on transport/HTTP failure so callers can apply their own fail-safe.
( text: string, entityTypes: string[], language: string )
| 52 | * Throws on transport/HTTP failure so callers can apply their own fail-safe. |
| 53 | */ |
| 54 | async function analyze( |
| 55 | text: string, |
| 56 | entityTypes: string[], |
| 57 | language: string |
| 58 | ): Promise<AnalyzerSpan[]> { |
| 59 | const entities = entityTypes.length > 0 ? entityTypes : undefined |
| 60 | |
| 61 | // boundary-raw-fetch: internal call to the Presidio analyzer service via PII_URL |
| 62 | const response = await fetch(`${PII_URL}/analyze`, { |
| 63 | method: 'POST', |
| 64 | headers: { 'content-type': 'application/json' }, |
| 65 | body: JSON.stringify({ text, language, ...(entities ? { entities } : {}) }), |
| 66 | }) |
| 67 | if (!response.ok) { |
| 68 | const detail = await response.text().catch(() => '') |
| 69 | throw new Error(`Presidio analyze failed (${response.status}): ${detail.slice(0, 200)}`) |
| 70 | } |
| 71 | return (await response.json()) as AnalyzerSpan[] |
| 72 | } |
| 73 | |
| 74 | /** |
| 75 | * Detect PII spans for many texts in a single analyzer pass (spaCy `nlp.pipe`), |