MCPcopy
hub / github.com/microsoft/Magma / get_som_response

Function get_som_response

agents/ui_agent/app.py:73–106  ·  view source on GitHub ↗
(instruction, image_som)

Source from the content-addressed store, hash-verified

71@spaces.GPU
72@torch.inference_mode()
73def get_som_response(instruction, image_som):
74 prompt = magma_som_prompt.format(instruction)
75 if magam_model.config.mm_use_image_start_end:
76 qs = prompt.replace('<image>', '<image_start><image><image_end>')
77 else:
78 qs = prompt
79 convs = [{"role": "user", "content": qs}]
80 convs = [{"role": "system", "content": "You are agent that can see, talk and act."}] + convs
81 prompt = magma_processor.tokenizer.apply_chat_template(
82 convs,
83 tokenize=False,
84 add_generation_prompt=True
85 )
86
87 inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
88 inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
89 inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
90 inputs = inputs.to(dtype).to(DEVICE)
91
92 magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
93 with torch.inference_mode():
94 output_ids = magam_model.generate(
95 **inputs,
96 temperature=0.0,
97 do_sample=False,
98 num_beams=1,
99 max_new_tokens=128,
100 use_cache=True
101 )
102
103 prompt_decoded = magma_processor.batch_decode(inputs['input_ids'], skip_special_tokens=True)[0]
104 response = magma_processor.batch_decode(output_ids, skip_special_tokens=True)[0]
105 response = response.replace(prompt_decoded, '').strip()
106 return response
107
108@spaces.GPU
109@torch.inference_mode()

Callers 1

processFunction · 0.85

Calls 1

batch_decodeMethod · 0.80

Tested by

no test coverage detected