hub / github.com/InternLM/lmdeploy / main_loop

Method main_loop

lmdeploy/pytorch/engine/engine_loop.py:405–450 · view source on GitHub ↗

Main loop of the engine. Each engine instance would communicate with the engine by queue.

(self)

Source from the content-addressed store, hash-verified

403	return forward_inputs, next_running
404
405	async def main_loop(self):
406	"""Main loop of the engine.
407
408	Each engine instance would communicate with the engine by queue.
409	"""
410	has_runable_event = self.has_runable_event
411	scheduler = self.scheduler
412	forward_inputs = None
413	next_running = None
414
415	async def __no_running_warning():
416	# TODO (JimyMa): add watermark check event instead of async sleep.
417	# self.perfill_watermark_event.wait()
418	logger.warning(f'no next prefill running request, Maybe cache is full, '
419	f'free gpu cache blocks: {scheduler.block_manager.get_num_free_gpu_blocks()}, '
420	f'total gpu cache blocks: {scheduler.block_manager.num_gpu_blocks}')
421	await asyncio.sleep(0.1)
422
423	while not self.stop_event.is_set():
424	if self._sleep_requested:
425	# Drop prefetched work from before sleep. Sleep ends scheduler
426	# sessions and releases KV cache, so any saved next batch is
427	# stale after the drain point.
428	forward_inputs = None
429	next_running = None
430	# Acknowledge that no new forward input will be scheduled until
431	# wakeup resumes this loop.
432	self._main_sleep_drain_event.set()
433	await self._sleep_resume_event.wait()
434	continue
435
436	if next_running is None:
437	forward_inputs, next_running = await self._main_loop_try_send_next_inputs()
438	if next_running is None:
439	if self._sleep_requested:
440	continue
441	await __no_running_warning()
442	continue
443
444	scheduler.activate_seqs(next_running)
445	forward_inputs, next_running = await self._main_loop_get_outputs(
446	running=next_running,
447	forward_inputs=forward_inputs,
448	)
449	self.inputs_maker.deactivate_evict_seqs()
450	has_runable_event.set()
451
452	def update_running_migration(self, running: 'SeqList', next_token_ids: np.ndarray, stopped: torch.Tensor,
453	model_metas: list[dict[str, Any]]):

Callers 1

startMethod · 0.95

Calls 6

_main_loop_try_send_next_inputsMethod · 0.95

_main_loop_get_outputsMethod · 0.95

activate_seqsMethod · 0.80

deactivate_evict_seqsMethod · 0.80

setMethod · 0.45

waitMethod · 0.45

Tested by

no test coverage detected