* Fires when the in-window failure rate meets the threshold with at least * SIM_MIN_EXECUTIONS_FOR_RATE_RULES executions. * * Intentionally diverges from the legacy notification rule, which required * the oldest in-window log to predate the window start — a condition that is * false for every i
( workflowId: string, ratePercent: number, windowHours: number )
| 43 | * false for every in-window log, making the legacy rule dead code. |
| 44 | */ |
| 45 | async function checkFailureRate( |
| 46 | workflowId: string, |
| 47 | ratePercent: number, |
| 48 | windowHours: number |
| 49 | ): Promise<boolean> { |
| 50 | const windowStart = new Date(Date.now() - windowHours * 60 * 60 * 1000) |
| 51 | |
| 52 | // Single DB-side aggregate: the window is user-configured and this runs on |
| 53 | // the execution-completion path, so never materialize the in-window rows. |
| 54 | const result = await db |
| 55 | .select({ |
| 56 | total: count(), |
| 57 | errors: count(sql`case when ${workflowExecutionLogs.level} = 'error' then 1 end`), |
| 58 | }) |
| 59 | .from(workflowExecutionLogs) |
| 60 | .where( |
| 61 | and( |
| 62 | eq(workflowExecutionLogs.workflowId, workflowId), |
| 63 | gte(workflowExecutionLogs.startedAt, windowStart), |
| 64 | excludeSimExecutionsCondition() |
| 65 | ) |
| 66 | ) |
| 67 | |
| 68 | const total = result[0]?.total ?? 0 |
| 69 | if (total < SIM_MIN_EXECUTIONS_FOR_RATE_RULES) return false |
| 70 | |
| 71 | const errorCount = result[0]?.errors ?? 0 |
| 72 | const failureRate = (errorCount / total) * 100 |
| 73 | |
| 74 | return failureRate >= ratePercent |
| 75 | } |
| 76 | |
| 77 | async function checkLatencySpike( |
| 78 | workflowId: string, |
no test coverage detected