MCPcopy
hub / github.com/Tele-AI/Telechat / main

Function main

quant/telechat_quantized_infer_demo.py:9–67  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

7
8
9def main():
10 # 加载模型相关
11 tokenizer = AutoTokenizer.from_pretrained(PATH, trust_remote_code=True)
12 model = TelechatGPTQForCausalLM.from_quantized(PATH, device="cuda:0", inject_fused_mlp=False,
13 inject_fused_attention=False, trust_remote_code=True)
14
15 generate_config = GenerationConfig.from_pretrained(PATH)
16 model.eval()
17
18 # chat(bot)模型多轮演示
19 print("*" * 10 + "多轮输入演示" + "*" * 10)
20 question = "你是谁?"
21 print("提问:", question)
22 answer, history = model.chat(tokenizer=tokenizer, question=question, history=[], generation_config=generate_config,
23 stream=False)
24 print("回答:", answer)
25 print("截至目前的聊天记录是:", history)
26
27 question = "你是谁训练的"
28 print("提问:", question)
29 # 将history传入
30 answer, history = model.chat(tokenizer=tokenizer, question=question, history=history,
31 generation_config=generate_config,
32 stream=False)
33 print("回答是:", answer)
34 print("截至目前的聊天记录是:", history)
35
36 # 也可以这么调用传入history
37 history = [
38 {"role": "user", "content": "你是谁"},
39 {"role": "bot", "content": "我是telechat"},
40 ]
41
42 question = "你是谁训练的"
43 print("提问:", question)
44 answer, history = model.chat(tokenizer=tokenizer, question=question, history=history,
45 generation_config=generate_config,
46 stream=False)
47 print("回答是:", answer)
48 print("截至目前的聊天记录是:", history)
49
50 # chat(bot)模型 流式返回演示
51 print("*" * 10 + "流式输入演示" + "*" * 10)
52 question = "你是谁?"
53 print("提问:", question)
54 gen = model.chat(tokenizer=tokenizer, question=question, history=[], generation_config=generate_config,
55 stream=True)
56 for answer, history in gen:
57 print("回答是:", answer)
58 print("截至目前的聊天记录是:", history)
59
60 # base模型 直接续写演示
61
62 inputs = "hello"
63 print("输入:", inputs)
64 output = model.generate(**tokenizer(inputs, return_tensors="pt").to(model.device),
65 generation_config=generate_config)
66 output = tokenizer.decode(output[0])

Calls 4

from_pretrainedMethod · 0.80
from_quantizedMethod · 0.80
evalMethod · 0.80
chatMethod · 0.45

Tested by

no test coverage detected