(body: string)
| 68 | * override or escape the system instructions. |
| 69 | */ |
| 70 | export async function isSpamComment(body: string): Promise<SpamCheckResult> { |
| 71 | if (!body.trim()) { |
| 72 | return { isSpam: false, reason: "Empty comment", confidence: 0 }; |
| 73 | } |
| 74 | |
| 75 | const client = createBedrockClient(); |
| 76 | |
| 77 | // Sanitize and isolate the comment — never interpolate into the system prompt. |
| 78 | const safeBody = sanitizeCommentBody(body); |
| 79 | |
| 80 | try { |
| 81 | const responseBody = await retryWithBackoff(async () => { |
| 82 | const command = new InvokeModelCommand({ |
| 83 | modelId: BEDROCK_MODEL_ID, |
| 84 | contentType: "application/json", |
| 85 | accept: "application/json", |
| 86 | body: JSON.stringify({ |
| 87 | anthropic_version: "bedrock-2023-05-31", |
| 88 | max_tokens: 256, |
| 89 | temperature: 0.1, |
| 90 | system: SPAM_DETECTION_PROMPT, |
| 91 | // Comment is the sole user message — structurally isolated from instructions. |
| 92 | messages: [{ role: "user", content: safeBody }], |
| 93 | }), |
| 94 | }); |
| 95 | const response = await client.send(command); |
| 96 | return new TextDecoder().decode(response.body); |
| 97 | }); |
| 98 | |
| 99 | const parsed = JSON.parse(responseBody); |
| 100 | const text = parsed.content?.find((c: any) => c.type === "text")?.text ?? ""; |
| 101 | const jsonMatch = text.match(/\{[\s\S]*\}/); |
| 102 | if (!jsonMatch) throw new Error("No JSON in Bedrock response"); |
| 103 | |
| 104 | const result = JSON.parse(jsonMatch[0]); |
| 105 | const confidence = result.confidence ?? 0; |
| 106 | const isSpam = result.is_spam === true && confidence >= CONFIDENCE_THRESHOLD; |
| 107 | |
| 108 | return { isSpam, reason: result.reason ?? "", confidence }; |
| 109 | } catch (err) { |
| 110 | console.warn("Bedrock spam check failed, skipping:", err); |
| 111 | return { isSpam: false, reason: "Bedrock check failed", confidence: 0 }; |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | /** |
| 116 | * Run a second independent Bedrock call to confirm a spam verdict. |
no test coverage detected