()
| 7 | |
| 8 | |
| 9 | def main(): |
| 10 | # 加载模型相关 |
| 11 | tokenizer = AutoTokenizer.from_pretrained(PATH, trust_remote_code=True) |
| 12 | model = TelechatGPTQForCausalLM.from_quantized(PATH, device="cuda:0", inject_fused_mlp=False, |
| 13 | inject_fused_attention=False, trust_remote_code=True) |
| 14 | |
| 15 | generate_config = GenerationConfig.from_pretrained(PATH) |
| 16 | model.eval() |
| 17 | |
| 18 | # chat(bot)模型多轮演示 |
| 19 | print("*" * 10 + "多轮输入演示" + "*" * 10) |
| 20 | question = "你是谁?" |
| 21 | print("提问:", question) |
| 22 | answer, history = model.chat(tokenizer=tokenizer, question=question, history=[], generation_config=generate_config, |
| 23 | stream=False) |
| 24 | print("回答:", answer) |
| 25 | print("截至目前的聊天记录是:", history) |
| 26 | |
| 27 | question = "你是谁训练的" |
| 28 | print("提问:", question) |
| 29 | # 将history传入 |
| 30 | answer, history = model.chat(tokenizer=tokenizer, question=question, history=history, |
| 31 | generation_config=generate_config, |
| 32 | stream=False) |
| 33 | print("回答是:", answer) |
| 34 | print("截至目前的聊天记录是:", history) |
| 35 | |
| 36 | # 也可以这么调用传入history |
| 37 | history = [ |
| 38 | {"role": "user", "content": "你是谁"}, |
| 39 | {"role": "bot", "content": "我是telechat"}, |
| 40 | ] |
| 41 | |
| 42 | question = "你是谁训练的" |
| 43 | print("提问:", question) |
| 44 | answer, history = model.chat(tokenizer=tokenizer, question=question, history=history, |
| 45 | generation_config=generate_config, |
| 46 | stream=False) |
| 47 | print("回答是:", answer) |
| 48 | print("截至目前的聊天记录是:", history) |
| 49 | |
| 50 | # chat(bot)模型 流式返回演示 |
| 51 | print("*" * 10 + "流式输入演示" + "*" * 10) |
| 52 | question = "你是谁?" |
| 53 | print("提问:", question) |
| 54 | gen = model.chat(tokenizer=tokenizer, question=question, history=[], generation_config=generate_config, |
| 55 | stream=True) |
| 56 | for answer, history in gen: |
| 57 | print("回答是:", answer) |
| 58 | print("截至目前的聊天记录是:", history) |
| 59 | |
| 60 | # base模型 直接续写演示 |
| 61 | |
| 62 | inputs = "hello" |
| 63 | print("输入:", inputs) |
| 64 | output = model.generate(**tokenizer(inputs, return_tensors="pt").to(model.device), |
| 65 | generation_config=generate_config) |
| 66 | output = tokenizer.decode(output[0]) |
no test coverage detected