| 1338 | return chunks |
| 1339 | |
| 1340 | def warm_cache(self, chunks): |
| 1341 | if not self.add_cache_headers: |
| 1342 | return |
| 1343 | if not self.num_cache_warming_pings: |
| 1344 | return |
| 1345 | if not self.ok_to_warm_cache: |
| 1346 | return |
| 1347 | |
| 1348 | delay = 5 * 60 - 5 |
| 1349 | delay = float(os.environ.get("AIDER_CACHE_KEEPALIVE_DELAY", delay)) |
| 1350 | self.next_cache_warm = time.time() + delay |
| 1351 | self.warming_pings_left = self.num_cache_warming_pings |
| 1352 | self.cache_warming_chunks = chunks |
| 1353 | |
| 1354 | if self.cache_warming_thread: |
| 1355 | return |
| 1356 | |
| 1357 | def warm_cache_worker(): |
| 1358 | while self.ok_to_warm_cache: |
| 1359 | time.sleep(1) |
| 1360 | if self.warming_pings_left <= 0: |
| 1361 | continue |
| 1362 | now = time.time() |
| 1363 | if now < self.next_cache_warm: |
| 1364 | continue |
| 1365 | |
| 1366 | self.warming_pings_left -= 1 |
| 1367 | self.next_cache_warm = time.time() + delay |
| 1368 | |
| 1369 | kwargs = dict(self.main_model.extra_params) or dict() |
| 1370 | kwargs["max_tokens"] = 1 |
| 1371 | |
| 1372 | try: |
| 1373 | completion = litellm.completion( |
| 1374 | model=self.main_model.name, |
| 1375 | messages=self.cache_warming_chunks.cacheable_messages(), |
| 1376 | stream=False, |
| 1377 | **kwargs, |
| 1378 | ) |
| 1379 | except Exception as err: |
| 1380 | self.io.tool_warning(f"Cache warming error: {str(err)}") |
| 1381 | continue |
| 1382 | |
| 1383 | cache_hit_tokens = getattr( |
| 1384 | completion.usage, "prompt_cache_hit_tokens", 0 |
| 1385 | ) or getattr(completion.usage, "cache_read_input_tokens", 0) |
| 1386 | |
| 1387 | if self.verbose: |
| 1388 | self.io.tool_output(f"Warmed {format_tokens(cache_hit_tokens)} cached tokens.") |
| 1389 | |
| 1390 | self.cache_warming_thread = threading.Timer(0, warm_cache_worker) |
| 1391 | self.cache_warming_thread.daemon = True |
| 1392 | self.cache_warming_thread.start() |
| 1393 | |
| 1394 | return chunks |
| 1395 | |
| 1396 | def check_tokens(self, messages): |
| 1397 | """Check if the messages will fit within the model's token limits.""" |