MCPcopy
hub / github.com/microsoft/Magma / get_qa_response

Function get_qa_response

agents/ui_agent/app.py:110–143  ·  view source on GitHub ↗
(instruction, image)

Source from the content-addressed store, hash-verified

108@spaces.GPU
109@torch.inference_mode()
110def get_qa_response(instruction, image):
111 prompt = magma_qa_prompt.format(instruction)
112 if magam_model.config.mm_use_image_start_end:
113 qs = prompt.replace('<image>', '<image_start><image><image_end>')
114 else:
115 qs = prompt
116 convs = [{"role": "user", "content": qs}]
117 convs = [{"role": "system", "content": "You are agent that can see, talk and act."}] + convs
118 prompt = magma_processor.tokenizer.apply_chat_template(
119 convs,
120 tokenize=False,
121 add_generation_prompt=True
122 )
123
124 inputs = magma_processor(images=[image], texts=prompt, return_tensors="pt")
125 inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
126 inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
127 inputs = inputs.to(dtype).to(DEVICE)
128
129 magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
130 with torch.inference_mode():
131 output_ids = magam_model.generate(
132 **inputs,
133 temperature=0.0,
134 do_sample=False,
135 num_beams=1,
136 max_new_tokens=128,
137 use_cache=True
138 )
139
140 prompt_decoded = magma_processor.batch_decode(inputs['input_ids'], skip_special_tokens=True)[0]
141 response = magma_processor.batch_decode(output_ids, skip_special_tokens=True)[0]
142 response = response.replace(prompt_decoded, '').strip()
143 return response
144
145@spaces.GPU
146@torch.inference_mode()

Callers 1

processFunction · 0.85

Calls 1

batch_decodeMethod · 0.80

Tested by

no test coverage detected