| 234 | } |
| 235 | |
| 236 | async function fetchCandidateSessions(args: Args): Promise<CandidateRow[]> { |
| 237 | const targetStructs = TARGETS.flatMap((target) => |
| 238 | target.modelIds.map((modelId) => ({ |
| 239 | agent_id: target.agentId, |
| 240 | model_id: modelId, |
| 241 | })), |
| 242 | ) |
| 243 | |
| 244 | const datePredicate = |
| 245 | args.startDate || args.endDate |
| 246 | ? ` |
| 247 | ${args.startDate ? 'AND m.finished_at >= TIMESTAMP(@startDate)' : ''} |
| 248 | ${args.endDate ? 'AND m.finished_at < TIMESTAMP(@endDate)' : ''} |
| 249 | ` |
| 250 | : ` |
| 251 | AND m.finished_at >= TIMESTAMP_SUB( |
| 252 | TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL @beforeDays DAY), |
| 253 | INTERVAL @lookbackDays DAY |
| 254 | ) |
| 255 | AND m.finished_at < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL @beforeDays DAY) |
| 256 | ` |
| 257 | |
| 258 | const isEvenTotalSample = args.totalSessions && args.sampleMode === 'even' |
| 259 | const samplingCtes = isEvenTotalSample |
| 260 | ? ` |
| 261 | ranked_by_time AS ( |
| 262 | SELECT |
| 263 | *, |
| 264 | ROW_NUMBER() OVER (ORDER BY last_finished_at DESC, max_message_count DESC) AS time_rank, |
| 265 | COUNT(*) OVER () AS total_count |
| 266 | FROM session_summary |
| 267 | ), |
| 268 | bucketed AS ( |
| 269 | SELECT |
| 270 | *, |
| 271 | CAST(FLOOR((time_rank - 1) * @sessionLimit / total_count) AS INT64) AS sample_bucket |
| 272 | FROM ranked_by_time |
| 273 | ), |
| 274 | ranked AS ( |
| 275 | SELECT |
| 276 | *, |
| 277 | ROW_NUMBER() OVER ( |
| 278 | PARTITION BY sample_bucket |
| 279 | ORDER BY RAND() |
| 280 | ) AS target_rank |
| 281 | FROM bucketed |
| 282 | ) |
| 283 | ` |
| 284 | : ` |
| 285 | ranked AS ( |
| 286 | SELECT |
| 287 | *, |
| 288 | ${ |
| 289 | args.totalSessions |
| 290 | ? `ROW_NUMBER() OVER ( |
| 291 | ORDER BY ${ |
| 292 | args.sampleMode === 'random' |
| 293 | ? 'RAND()' |