MCPcopy Index your code
hub / github.com/NVIDIA/TensorRT-LLM / build_kosmos_engine

Function build_kosmos_engine

tensorrt_llm/tools/multimodal_builder.py:934–968  ·  view source on GitHub ↗
(args)

Source from the content-addressed store, hash-verified

932
933
934def build_kosmos_engine(args):
935 processor = AutoProcessor.from_pretrained(args.model_path)
936 raw_image = Image.new('RGB', [10, 10]) # dummy image
937 image = processor(text="dummy", images=raw_image,
938 return_tensors="pt")['pixel_values'].to(
939 args.device, torch.float16)
940
941 class VisionEncoderWrapper(torch.nn.Module):
942
943 def __init__(self, encoder, connector):
944 super().__init__()
945 self.encoder = encoder
946 self.connector = connector
947
948 def forward(self, images):
949 vision_x = self.encoder(images, output_hidden_states=True)
950 img_features = self.encoder.model.post_layernorm(
951 vision_x.last_hidden_state)
952 img_features = F.normalize(img_features, dim=-1)
953 img_features, _ = self.connector(img_features)
954 return img_features
955
956 model = AutoModelForVision2Seq.from_pretrained(args.model_path,
957 dtype=torch.float16)
958 wrapper = VisionEncoderWrapper(
959 model.vision_model.to(args.device),
960 model.image_to_text_projection.to(args.device))
961
962 export_onnx(wrapper, image, f'{args.output_dir}/onnx')
963 build_trt_engine(
964 args.model_type,
965 [image.shape[1], image.shape[2], image.shape[3]], # [3, H, W]
966 f'{args.output_dir}/onnx',
967 args.output_dir,
968 args.max_batch_size)
969
970
971def build_phi_engine(args):

Callers 1

buildMethod · 0.85

Calls 5

export_onnxFunction · 0.85
build_trt_engineFunction · 0.85
from_pretrainedMethod · 0.45
toMethod · 0.45

Tested by

no test coverage detected