Class CacheConfig

lmdeploy/pytorch/config.py:90–120 · view source on GitHub ↗

Config of key value cache.

Source from the content-addressed store, hash-verified

88
89	@dataclass
90	class CacheConfig:
91	"""Config of key value cache."""
92
93	max_batches: int
94	block_size: int
95	num_cpu_blocks: int
96	num_gpu_blocks: int
97	kernel_block_size: int = -1
98	window_size: int = -1
99	cache_max_entry_count: float = 0.8
100	max_prefill_token_num: int = 8192
101	enable_prefix_caching: bool = False
102	quant_policy: QuantPolicy = QuantPolicy.NONE
103	device_type: str = 'cuda'
104	num_state_caches: int = None
105	states_shapes: list[tuple] = field(default_factory=list)
106
107	# reserved blocks for dummy inputs, init to 0 for unit test.
108	num_reserved_gpu_blocks: int = 0
109
110	# For PD Disaggregation
111	role: EngineRole = EngineRole.Hybrid
112	migration_backend: MigrationBackend = MigrationBackend.DLSlime
113
114	def __post_init__(self):
115	"""Post init."""
116	if self.window_size > 1 and self.enable_prefix_caching:
117	logger.warning('Prefix caching is not available for window attention.')
118	self.enable_prefix_caching = False
119	if self.kernel_block_size == -1:
120	self.kernel_block_size = self.block_size
121
122
123	class TPMode(enum.Enum):

build_cache_configMethod · 0.90

test_allocate_caches_requires_block_size_divisible_by_kernel_block_sizeFunction · 0.90

test_pd_migration_rejects_split_kernel_blocksFunction · 0.90

test_sync_spec_cache_block_size_updates_kernel_block_sizeFunction · 0.90

test_update_num_gpu_blocks_can_be_limited_by_non_spec_rankFunction · 0.90

cache_configMethod · 0.90

test_reserved_state_cache_is_not_allocatableFunction · 0.90

test_non_ssm_state_manager_without_state_cachesFunction · 0.90

test_fp8_quant_cache_descs_are_emptyFunction · 0.90

no outgoing calls

test_allocate_caches_requires_block_size_divisible_by_kernel_block_sizeFunction · 0.72

test_pd_migration_rejects_split_kernel_blocksFunction · 0.72

test_sync_spec_cache_block_size_updates_kernel_block_sizeFunction · 0.72

test_update_num_gpu_blocks_can_be_limited_by_non_spec_rankFunction · 0.72

cache_configMethod · 0.72

test_reserved_state_cache_is_not_allocatableFunction · 0.72

test_non_ssm_state_manager_without_state_cachesFunction · 0.72

test_fp8_quant_cache_descs_are_emptyFunction · 0.72