hub / github.com/MiniMax-AI/MiniMax-01 / main

Function main

inference/minimax-vl-01.py:48–117 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

46
47	@torch.no_grad()
48	def main():
49	args = parse_args()
50	print("\n=============== Argument ===============")
51	for key in vars(args):
52	print(f"{key}: {vars(args)[key]}")
53	print("========================================")
54
55	model_id = args.model_id
56
57	hf_config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
58	processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
59	quantization_config = generate_quanto_config(hf_config, args.quant_type)
60
61	check_params(args, hf_config)
62
63	model_safetensors_index_path = os.path.join(model_id, "model.safetensors.index.json")
64	with open(model_safetensors_index_path, "r") as f:
65	model_safetensors_index = json.load(f)
66	weight_map = model_safetensors_index['weight_map']
67	vision_map = {}
68	for key, value in weight_map.items():
69	if 'vision_tower' in key or 'image_newline' in key or 'multi_modal_projector' in key:
70	new_key = key.replace('.weight','').replace('.bias','')
71	if new_key not in vision_map:
72	vision_map[new_key] = value
73	device_map = {
74	'language_model.model.embed_tokens': 'cuda:0',
75	'language_model.model.norm': f'cuda:{args.world_size - 1}',
76	'language_model.lm_head': f'cuda:{args.world_size - 1}'
77	}
78	for key, value in vision_map.items():
79	device_map[key] = f'cuda:0'
80	device_map['vision_tower.vision_model.post_layernorm'] = f'cuda:0'
81	layers_per_device = hf_config.text_config.num_hidden_layers // args.world_size
82	for i in range(args.world_size):
83	for j in range(layers_per_device):
84	device_map[f'language_model.model.layers.{i * layers_per_device + j}'] = f'cuda:{i}'
85
86	messages = [
87	{"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant created by Minimax based on MiniMax-VL-01 model."}]},
88	{"role": "user", "content": [{"type": "image", "image": "placeholder"},{"type": "text", "text": "Describe this image."}]},
89	]
90	prompt = processor.tokenizer.apply_chat_template(
91	messages, tokenize=False, add_generation_prompt=True
92	)
93	print(f"prompt: \n{prompt}")
94	raw_image = Image.open(args.image_path)
95	model_inputs = processor(images=[raw_image], text=prompt, return_tensors='pt').to('cuda').to(torch.bfloat16)
96
97	quantized_model = AutoModelForCausalLM.from_pretrained(
98	model_id,
99	torch_dtype="bfloat16",
100	device_map=device_map,
101	quantization_config=quantization_config,
102	trust_remote_code=True,
103	offload_buffers=True,
104	)
105	generation_config = GenerationConfig(

Callers 1

minimax-vl-01.pyFile · 0.70

Calls 3

parse_argsFunction · 0.70

generate_quanto_configFunction · 0.70

check_paramsFunction · 0.70

Tested by

no test coverage detected