Function main

fastchat/serve/cli.py:192–260 · view source on GitHub ↗

(args)

Source from the content-addressed store, hash-verified

190
191
192	def main(args):
193	if args.gpus:
194	if len(args.gpus.split(",")) < args.num_gpus:
195	raise ValueError(
196	f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!"
197	)
198	os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
199	os.environ["XPU_VISIBLE_DEVICES"] = args.gpus
200	if args.enable_exllama:
201	exllama_config = ExllamaConfig(
202	max_seq_len=args.exllama_max_seq_len,
203	gpu_split=args.exllama_gpu_split,
204	cache_8bit=args.exllama_cache_8bit,
205	)
206	else:
207	exllama_config = None
208	if args.enable_xft:
209	xft_config = XftConfig(
210	max_seq_len=args.xft_max_seq_len,
211	data_type=args.xft_dtype,
212	)
213	if args.device != "cpu":
214	print("xFasterTransformer now is only support CPUs. Reset device to CPU")
215	args.device = "cpu"
216	else:
217	xft_config = None
218	if args.style == "simple":
219	chatio = SimpleChatIO(args.multiline)
220	elif args.style == "rich":
221	chatio = RichChatIO(args.multiline, args.mouse)
222	elif args.style == "programmatic":
223	chatio = ProgrammaticChatIO()
224	else:
225	raise ValueError(f"Invalid style for console: {args.style}")
226	try:
227	chat_loop(
228	args.model_path,
229	args.device,
230	args.num_gpus,
231	args.max_gpu_memory,
232	str_to_torch_dtype(args.dtype),
233	args.load_8bit,
234	args.cpu_offloading,
235	args.conv_template,
236	args.conv_system_msg,
237	args.temperature,
238	args.repetition_penalty,
239	args.max_new_tokens,
240	chatio,
241	gptq_config=GptqConfig(
242	ckpt=args.gptq_ckpt or args.model_path,
243	wbits=args.gptq_wbits,
244	groupsize=args.gptq_groupsize,
245	act_order=args.gptq_act_order,
246	),
247	awq_config=AWQConfig(
248	ckpt=args.awq_ckpt or args.model_path,
249	wbits=args.awq_wbits,

Callers 1

cli.pyFile · 0.70

Calls 9

ExllamaConfigClass · 0.90

XftConfigClass · 0.90

chat_loopFunction · 0.90

str_to_torch_dtypeFunction · 0.90

GptqConfigClass · 0.90

AWQConfigClass · 0.90

SimpleChatIOClass · 0.85

RichChatIOClass · 0.85

ProgrammaticChatIOClass · 0.85

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…