MCPcopy
hub / github.com/NVIDIA/TensorRT-LLM / make_context

Function make_context

tensorrt_llm/models/qwen/utils.py:17–85  ·  view source on GitHub ↗
(
    tokenizer,
    query,
    history,
    system,
    max_input_length,
    max_window_size: int = 6144,
    chat_format: str = "chatml",
)

Source from the content-addressed store, hash-verified

15
16
17def make_context(
18 tokenizer,
19 query,
20 history,
21 system,
22 max_input_length,
23 max_window_size: int = 6144,
24 chat_format: str = "chatml",
25):
26 if history is None:
27 history = []
28
29 if chat_format == "chatml":
30 im_start, im_end = "<|im_start|>", "<|im_end|>"
31 im_start_tokens = [tokenizer.im_start_id]
32 im_end_tokens = [tokenizer.im_end_id]
33 nl_tokens = tokenizer.encode("\n")
34
35 def _tokenize_str(role, content):
36 return (f"{role}\n{content}",
37 tokenizer.encode(
38 role,
39 allowed_special=set(),
40 ) + nl_tokens + tokenizer.encode(
41 content,
42 allowed_special=set(),
43 ))
44
45 system_text, system_tokens_part = _tokenize_str("system", system)
46 system_tokens = im_start_tokens + system_tokens_part + im_end_tokens
47 raw_text = ""
48 context_tokens = []
49
50 for turn_query, turn_response in reversed(history):
51 query_text, query_tokens_part = _tokenize_str("user", turn_query)
52 query_tokens = im_start_tokens + query_tokens_part + im_end_tokens
53
54 response_text, response_tokens_part = _tokenize_str(
55 "assistant", turn_response)
56 response_tokens = im_start_tokens + response_tokens_part + im_end_tokens
57 next_context_tokens = nl_tokens + query_tokens + nl_tokens + response_tokens
58 prev_chat = (
59 f"\n{im_start}{query_text}{im_end}\n{im_start}{response_text}{im_end}"
60 )
61
62 current_context_size = (len(system_tokens) +
63 len(next_context_tokens) +
64 len(context_tokens))
65 if current_context_size < max_window_size:
66 context_tokens = next_context_tokens + context_tokens
67 raw_text = prev_chat + raw_text
68 else:
69 break
70
71 context_tokens = system_tokens + context_tokens
72 raw_text = f"{im_start}{system_text}{im_end}" + raw_text
73 context_tokens += (nl_tokens + im_start_tokens +
74 _tokenize_str("user", query)[1] + im_end_tokens +

Callers 2

_prepare_inputsFunction · 0.90
capture_activation_rangeFunction · 0.85

Calls 2

_tokenize_strFunction · 0.85
encodeMethod · 0.45

Tested by

no test coverage detected