MCPcopy
hub / github.com/InternLM/lmdeploy / CacheConfig

Class CacheConfig

lmdeploy/pytorch/config.py:90–120  ·  view source on GitHub ↗

Config of key value cache.

Source from the content-addressed store, hash-verified

88
89@dataclass
90class CacheConfig:
91 """Config of key value cache."""
92
93 max_batches: int
94 block_size: int
95 num_cpu_blocks: int
96 num_gpu_blocks: int
97 kernel_block_size: int = -1
98 window_size: int = -1
99 cache_max_entry_count: float = 0.8
100 max_prefill_token_num: int = 8192
101 enable_prefix_caching: bool = False
102 quant_policy: QuantPolicy = QuantPolicy.NONE
103 device_type: str = 'cuda'
104 num_state_caches: int = None
105 states_shapes: list[tuple] = field(default_factory=list)
106
107 # reserved blocks for dummy inputs, init to 0 for unit test.
108 num_reserved_gpu_blocks: int = 0
109
110 # For PD Disaggregation
111 role: EngineRole = EngineRole.Hybrid
112 migration_backend: MigrationBackend = MigrationBackend.DLSlime
113
114 def __post_init__(self):
115 """Post init."""
116 if self.window_size > 1 and self.enable_prefix_caching:
117 logger.warning('Prefix caching is not available for window attention.')
118 self.enable_prefix_caching = False
119 if self.kernel_block_size == -1:
120 self.kernel_block_size = self.block_size
121
122
123class TPMode(enum.Enum):

Calls

no outgoing calls