MCPcopy
hub / github.com/MiniMax-AI/MiniMax-01 / main

Function main

inference/minimax-vl-01.py:48–117  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

46
47@torch.no_grad()
48def main():
49 args = parse_args()
50 print("\n=============== Argument ===============")
51 for key in vars(args):
52 print(f"{key}: {vars(args)[key]}")
53 print("========================================")
54
55 model_id = args.model_id
56
57 hf_config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
58 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
59 quantization_config = generate_quanto_config(hf_config, args.quant_type)
60
61 check_params(args, hf_config)
62
63 model_safetensors_index_path = os.path.join(model_id, "model.safetensors.index.json")
64 with open(model_safetensors_index_path, "r") as f:
65 model_safetensors_index = json.load(f)
66 weight_map = model_safetensors_index['weight_map']
67 vision_map = {}
68 for key, value in weight_map.items():
69 if 'vision_tower' in key or 'image_newline' in key or 'multi_modal_projector' in key:
70 new_key = key.replace('.weight','').replace('.bias','')
71 if new_key not in vision_map:
72 vision_map[new_key] = value
73 device_map = {
74 'language_model.model.embed_tokens': 'cuda:0',
75 'language_model.model.norm': f'cuda:{args.world_size - 1}',
76 'language_model.lm_head': f'cuda:{args.world_size - 1}'
77 }
78 for key, value in vision_map.items():
79 device_map[key] = f'cuda:0'
80 device_map['vision_tower.vision_model.post_layernorm'] = f'cuda:0'
81 layers_per_device = hf_config.text_config.num_hidden_layers // args.world_size
82 for i in range(args.world_size):
83 for j in range(layers_per_device):
84 device_map[f'language_model.model.layers.{i * layers_per_device + j}'] = f'cuda:{i}'
85
86 messages = [
87 {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant created by Minimax based on MiniMax-VL-01 model."}]},
88 {"role": "user", "content": [{"type": "image", "image": "placeholder"},{"type": "text", "text": "Describe this image."}]},
89 ]
90 prompt = processor.tokenizer.apply_chat_template(
91 messages, tokenize=False, add_generation_prompt=True
92 )
93 print(f"prompt: \n{prompt}")
94 raw_image = Image.open(args.image_path)
95 model_inputs = processor(images=[raw_image], text=prompt, return_tensors='pt').to('cuda').to(torch.bfloat16)
96
97 quantized_model = AutoModelForCausalLM.from_pretrained(
98 model_id,
99 torch_dtype="bfloat16",
100 device_map=device_map,
101 quantization_config=quantization_config,
102 trust_remote_code=True,
103 offload_buffers=True,
104 )
105 generation_config = GenerationConfig(

Callers 1

minimax-vl-01.pyFile · 0.70

Calls 3

parse_argsFunction · 0.70
generate_quanto_configFunction · 0.70
check_paramsFunction · 0.70

Tested by

no test coverage detected