MCPcopy
hub / github.com/InternLM/lmdeploy / test_tokenizer

Function test_tokenizer

tests/test_lmdeploy/test_tokenizer.py:17–31  ·  view source on GitHub ↗
(model_path, input, interval, add_special_tokens, skip_special_tokens)

Source from the content-addressed store, hash-verified

15@pytest.mark.parametrize('add_special_tokens', [True, False])
16@pytest.mark.parametrize('skip_special_tokens', [True, False])
17def test_tokenizer(model_path, input, interval, add_special_tokens, skip_special_tokens):
18 tokenizer = Tokenizer(model_path, trust_remote_code=True).model
19 encoded = tokenizer.encode(input, False, add_special_tokens=add_special_tokens)
20 output = ''
21 input = tokenizer.decode(encoded, skip_special_tokens=skip_special_tokens)
22 state = DetokenizeState()
23 for i in range(0, len(encoded), interval):
24 offset = i + interval
25 if offset < len(encoded):
26 # lmdeploy may decode nothing when concurrency is high
27 if random.randint(1, 10) < 4:
28 offset -= interval
29 decoded, state = tokenizer.detokenize_incrementally(encoded[:offset], state, skip_special_tokens)
30 output += decoded
31 assert input == output, 'input string should equal to output after enc-dec'
32
33
34@pytest.mark.parametrize('model_path', [

Callers

nothing calls this directly

Calls 5

TokenizerClass · 0.90
DetokenizeStateClass · 0.90
encodeMethod · 0.45
decodeMethod · 0.45

Tested by

no test coverage detected