(tiktoken_bpe_file: str, expected_hash: str | None = None)
| 157 | |
| 158 | |
| 159 | def load_tiktoken_bpe(tiktoken_bpe_file: str, expected_hash: str | None = None) -> dict[bytes, int]: |
| 160 | # NB: do not add caching to this function |
| 161 | contents = read_file_cached(tiktoken_bpe_file, expected_hash) |
| 162 | ret = {} |
| 163 | for line in contents.splitlines(): |
| 164 | if not line: |
| 165 | continue |
| 166 | try: |
| 167 | token, rank = line.split() |
| 168 | ret[base64.b64decode(token)] = int(rank) |
| 169 | except Exception as e: |
| 170 | raise ValueError(f"Error parsing line {line!r} in {tiktoken_bpe_file}") from e |
| 171 | return ret |
no test coverage detected
searching dependent graphs…