Update current running list and done list
(self)
| 378 | self.running_bb.append_batch_tokens(sample_tokens) |
| 379 | |
| 380 | def update(self): |
| 381 | """ |
| 382 | Update current running list and done list |
| 383 | """ |
| 384 | if not self.prefill_bb.is_empty: |
| 385 | self.running_list.move_prefill_to_decoding(self.prefill_bb.seqs_ids) |
| 386 | self.running_bb.merge(self.prefill_bb) |
| 387 | # clear the prefill batch without assigning a free_block_tables_fn |
| 388 | # since we want to reuse the memory recorded on the block tables |
| 389 | self.prefill_bb.clear(free_block_tables_fn=None) |
| 390 | |
| 391 | finished_seqs, _ = self.running_bb.pop_finished(self.cache_manager.free_block_table) |
| 392 | for seq in finished_seqs: |
| 393 | self.running_list.remove(seq) |
| 394 | self.done_list.extend(finished_seqs) |
| 395 | |
| 396 | return finished_seqs |
| 397 | |
| 398 | def streamingllm_free_block_tables(self, updated_block_ids: List[int]): |
| 399 | """ |
no test coverage detected