()
| 5 | |
| 6 | |
| 7 | def test_simple(): |
| 8 | # Note that there are more actual tests, they're just not currently public :-) |
| 9 | enc = tiktoken.get_encoding("gpt2") |
| 10 | assert enc.encode("hello world") == [31373, 995] |
| 11 | assert enc.decode([31373, 995]) == "hello world" |
| 12 | assert enc.encode("hello <|endoftext|>", allowed_special="all") == [31373, 220, 50256] |
| 13 | |
| 14 | enc = tiktoken.get_encoding("cl100k_base") |
| 15 | assert enc.encode("hello world") == [15339, 1917] |
| 16 | assert enc.decode([15339, 1917]) == "hello world" |
| 17 | assert enc.encode("hello <|endoftext|>", allowed_special="all") == [15339, 220, 100257] |
| 18 | |
| 19 | for enc_name in tiktoken.list_encoding_names(): |
| 20 | enc = tiktoken.get_encoding(enc_name) |
| 21 | for token in range(10_000): |
| 22 | assert enc.encode_single_token(enc.decode_single_token_bytes(token)) == token |
| 23 | |
| 24 | |
| 25 | def test_encoding_for_model(): |
nothing calls this directly
no test coverage detected
searching dependent graphs…