Dump parameters to Tensor cache. Parameters ---------- params: Union[ Mapping[str, Union[np.ndarray, tvm.runtime.Tensor]], Iterator[Tuple[str, Union[np.ndarray, tvm.runtime.Tensor]]], ] The parameter dictionary or generator cache_dir: str The pat
(
params: Mapping[str, np.ndarray | tvm.runtime.Tensor]
| Iterator[tuple[str, np.ndarray | tvm.runtime.Tensor]],
cache_dir: str,
encode_format="f32-to-bf16",
meta_data=None,
shard_cap_mb=32,
show_progress: bool = True,
update_if_exists: bool = False,
)
| 200 | |
| 201 | |
| 202 | def dump_tensor_cache( |
| 203 | params: Mapping[str, np.ndarray | tvm.runtime.Tensor] |
| 204 | | Iterator[tuple[str, np.ndarray | tvm.runtime.Tensor]], |
| 205 | cache_dir: str, |
| 206 | encode_format="f32-to-bf16", |
| 207 | meta_data=None, |
| 208 | shard_cap_mb=32, |
| 209 | show_progress: bool = True, |
| 210 | update_if_exists: bool = False, |
| 211 | ): |
| 212 | """Dump parameters to Tensor cache. |
| 213 | |
| 214 | Parameters |
| 215 | ---------- |
| 216 | params: Union[ |
| 217 | Mapping[str, Union[np.ndarray, tvm.runtime.Tensor]], |
| 218 | Iterator[Tuple[str, Union[np.ndarray, tvm.runtime.Tensor]]], |
| 219 | ] |
| 220 | The parameter dictionary or generator |
| 221 | |
| 222 | cache_dir: str |
| 223 | The path to the cache |
| 224 | |
| 225 | encode_format: {"f32-to-bf16", "raw"} |
| 226 | Encoding format. |
| 227 | |
| 228 | meta_data: json-compatible-struct or Callable[[], Any] |
| 229 | Extra meta_data to be stored in the cache json file, |
| 230 | or a callable that returns the metadata. |
| 231 | |
| 232 | shard_cap_mb: int |
| 233 | Maxinum number of MB to be kept per shard |
| 234 | |
| 235 | show_progress: bool |
| 236 | A boolean indicating if to show the dump progress. |
| 237 | |
| 238 | update_if_exists: bool |
| 239 | If the cache already exists, update the cache. When set to False, it will overwrite the |
| 240 | existing files. |
| 241 | """ |
| 242 | if encode_format not in ("raw", "f32-to-bf16"): |
| 243 | raise ValueError(f"Invalie encode_format {encode_format}") |
| 244 | |
| 245 | records = [] |
| 246 | from_generator = isinstance(params, GeneratorType) |
| 247 | total_bytes = 0 |
| 248 | counter = 0 |
| 249 | max_out_length = 0 |
| 250 | |
| 251 | if not os.path.exists(cache_dir): |
| 252 | os.makedirs(cache_dir) |
| 253 | |
| 254 | f32_to_bf16_triggered = False |
| 255 | |
| 256 | print(f"Start storing to cache {cache_dir}") |
| 257 | shard_cap_nbytes = shard_cap_mb * (1 << 20) |
| 258 | |
| 259 | nd_cache_json = os.path.join(cache_dir, "tensor-cache.json") |
nothing calls this directly
no test coverage detected
searching dependent graphs…